1 // SPDX-License-Identifier: MIT
3 * Copyright © 2014 Intel Corporation
6 #include <linux/circ_buf.h>
8 #include "gem/i915_gem_context.h"
9 #include "gt/gen8_engine_cs.h"
10 #include "gt/intel_breadcrumbs.h"
11 #include "gt/intel_context.h"
12 #include "gt/intel_engine_heartbeat.h"
13 #include "gt/intel_engine_pm.h"
14 #include "gt/intel_engine_regs.h"
15 #include "gt/intel_gpu_commands.h"
16 #include "gt/intel_gt.h"
17 #include "gt/intel_gt_clock_utils.h"
18 #include "gt/intel_gt_irq.h"
19 #include "gt/intel_gt_pm.h"
20 #include "gt/intel_gt_regs.h"
21 #include "gt/intel_gt_requests.h"
22 #include "gt/intel_lrc.h"
23 #include "gt/intel_lrc_reg.h"
24 #include "gt/intel_mocs.h"
25 #include "gt/intel_ring.h"
27 #include "intel_guc_ads.h"
28 #include "intel_guc_capture.h"
29 #include "intel_guc_submission.h"
32 #include "i915_trace.h"
35 * DOC: GuC-based command submission
37 * The Scratch registers:
38 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
39 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
40 * triggers an interrupt on the GuC via another register write (0xC4C8).
41 * Firmware writes a success/fail code back to the action register after
42 * processes the request. The kernel driver polls waiting for this update and
45 * Command Transport buffers (CTBs):
46 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host
47 * - G2H) are a message interface between the i915 and GuC.
49 * Context registration:
50 * Before a context can be submitted it must be registered with the GuC via a
51 * H2G. A unique guc_id is associated with each context. The context is either
52 * registered at request creation time (normal operation) or at submission time
53 * (abnormal operation, e.g. after a reset).
56 * The i915 updates the LRC tail value in memory. The i915 must enable the
57 * scheduling of the context within the GuC for the GuC to actually consider it.
58 * Therefore, the first time a disabled context is submitted we use a schedule
59 * enable H2G, while follow up submissions are done via the context submit H2G,
60 * which informs the GuC that a previously enabled context has new work
64 * To unpin a context a H2G is used to disable scheduling. When the
65 * corresponding G2H returns indicating the scheduling disable operation has
66 * completed it is safe to unpin the context. While a disable is in flight it
67 * isn't safe to resubmit the context so a fence is used to stall all future
68 * requests of that context until the G2H is returned.
70 * Context deregistration:
71 * Before a context can be destroyed or if we steal its guc_id we must
72 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't
73 * safe to submit anything to this guc_id until the deregister completes so a
74 * fence is used to stall all requests associated with this guc_id until the
75 * corresponding G2H returns indicating the guc_id has been deregistered.
77 * submission_state.guc_ids:
78 * Unique number associated with private GuC context data passed in during
79 * context registration / submission / deregistration. 64k available. Simple ida
80 * is used for allocation.
83 * If no guc_ids are available they can be stolen from another context at
84 * request creation time if that context is unpinned. If a guc_id can't be found
85 * we punt this problem to the user as we believe this is near impossible to hit
86 * during normal use cases.
89 * In the GuC submission code we have 3 basic spin locks which protect
90 * everything. Details about each below.
93 * This is the submission lock for all contexts that share an i915 schedule
94 * engine (sched_engine), thus only one of the contexts which share a
95 * sched_engine can be submitting at a time. Currently only one sched_engine is
96 * used for all of GuC submission but that could change in the future.
98 * guc->submission_state.lock
99 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts
103 * Protects everything under ce->guc_state. Ensures that a context is in the
104 * correct state before issuing a H2G. e.g. We don't issue a schedule disable
105 * on a disabled context (bad idea), we don't issue a schedule enable when a
106 * schedule disable is in flight, etc... Also protects list of inflight requests
107 * on the context and the priority management state. Lock is individual to each
110 * Lock ordering rules:
111 * sched_engine->lock -> ce->guc_state.lock
112 * guc->submission_state.lock -> ce->guc_state.lock
115 * When a full GT reset is triggered it is assumed that some G2H responses to
116 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be
117 * fatal as we do certain operations upon receiving a G2H (e.g. destroy
118 * contexts, release guc_ids, etc...). When this occurs we can scrub the
119 * context state and cleanup appropriately, however this is quite racey.
120 * To avoid races, the reset code must disable submission before scrubbing for
121 * the missing G2H, while the submission code must check for submission being
122 * disabled and skip sending H2Gs and updating context states when it is. Both
123 * sides must also make sure to hold the relevant locks.
126 /* GuC Virtual Engine */
127 struct guc_virtual_engine {
128 struct intel_engine_cs base;
129 struct intel_context context;
132 static struct intel_context *
133 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
134 unsigned long flags);
136 static struct intel_context *
137 guc_create_parallel(struct intel_engine_cs **engines,
138 unsigned int num_siblings,
141 #define GUC_REQUEST_SIZE 64 /* bytes */
144 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
145 * per the GuC submission interface. A different allocation algorithm is used
146 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
147 * partition the guc_id space. We believe the number of multi-lrc contexts in
148 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
151 #define NUMBER_MULTI_LRC_GUC_ID(guc) \
152 ((guc)->submission_state.num_guc_ids / 16)
155 * Below is a set of functions which control the GuC scheduling state which
158 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0)
159 #define SCHED_STATE_DESTROYED BIT(1)
160 #define SCHED_STATE_PENDING_DISABLE BIT(2)
161 #define SCHED_STATE_BANNED BIT(3)
162 #define SCHED_STATE_ENABLED BIT(4)
163 #define SCHED_STATE_PENDING_ENABLE BIT(5)
164 #define SCHED_STATE_REGISTERED BIT(6)
165 #define SCHED_STATE_POLICY_REQUIRED BIT(7)
166 #define SCHED_STATE_BLOCKED_SHIFT 8
167 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT)
168 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT)
170 static inline void init_sched_state(struct intel_context *ce)
172 lockdep_assert_held(&ce->guc_state.lock);
173 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
177 static bool sched_state_is_init(struct intel_context *ce)
179 /* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */
180 return !(ce->guc_state.sched_state &
181 ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED));
185 context_wait_for_deregister_to_register(struct intel_context *ce)
187 return ce->guc_state.sched_state &
188 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
192 set_context_wait_for_deregister_to_register(struct intel_context *ce)
194 lockdep_assert_held(&ce->guc_state.lock);
195 ce->guc_state.sched_state |=
196 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
200 clr_context_wait_for_deregister_to_register(struct intel_context *ce)
202 lockdep_assert_held(&ce->guc_state.lock);
203 ce->guc_state.sched_state &=
204 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
208 context_destroyed(struct intel_context *ce)
210 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED;
214 set_context_destroyed(struct intel_context *ce)
216 lockdep_assert_held(&ce->guc_state.lock);
217 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
220 static inline bool context_pending_disable(struct intel_context *ce)
222 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
225 static inline void set_context_pending_disable(struct intel_context *ce)
227 lockdep_assert_held(&ce->guc_state.lock);
228 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE;
231 static inline void clr_context_pending_disable(struct intel_context *ce)
233 lockdep_assert_held(&ce->guc_state.lock);
234 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE;
237 static inline bool context_banned(struct intel_context *ce)
239 return ce->guc_state.sched_state & SCHED_STATE_BANNED;
242 static inline void set_context_banned(struct intel_context *ce)
244 lockdep_assert_held(&ce->guc_state.lock);
245 ce->guc_state.sched_state |= SCHED_STATE_BANNED;
248 static inline void clr_context_banned(struct intel_context *ce)
250 lockdep_assert_held(&ce->guc_state.lock);
251 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED;
254 static inline bool context_enabled(struct intel_context *ce)
256 return ce->guc_state.sched_state & SCHED_STATE_ENABLED;
259 static inline void set_context_enabled(struct intel_context *ce)
261 lockdep_assert_held(&ce->guc_state.lock);
262 ce->guc_state.sched_state |= SCHED_STATE_ENABLED;
265 static inline void clr_context_enabled(struct intel_context *ce)
267 lockdep_assert_held(&ce->guc_state.lock);
268 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED;
271 static inline bool context_pending_enable(struct intel_context *ce)
273 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE;
276 static inline void set_context_pending_enable(struct intel_context *ce)
278 lockdep_assert_held(&ce->guc_state.lock);
279 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE;
282 static inline void clr_context_pending_enable(struct intel_context *ce)
284 lockdep_assert_held(&ce->guc_state.lock);
285 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE;
288 static inline bool context_registered(struct intel_context *ce)
290 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED;
293 static inline void set_context_registered(struct intel_context *ce)
295 lockdep_assert_held(&ce->guc_state.lock);
296 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED;
299 static inline void clr_context_registered(struct intel_context *ce)
301 lockdep_assert_held(&ce->guc_state.lock);
302 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED;
305 static inline bool context_policy_required(struct intel_context *ce)
307 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED;
310 static inline void set_context_policy_required(struct intel_context *ce)
312 lockdep_assert_held(&ce->guc_state.lock);
313 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED;
316 static inline void clr_context_policy_required(struct intel_context *ce)
318 lockdep_assert_held(&ce->guc_state.lock);
319 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED;
322 static inline u32 context_blocked(struct intel_context *ce)
324 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
325 SCHED_STATE_BLOCKED_SHIFT;
328 static inline void incr_context_blocked(struct intel_context *ce)
330 lockdep_assert_held(&ce->guc_state.lock);
332 ce->guc_state.sched_state += SCHED_STATE_BLOCKED;
334 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */
337 static inline void decr_context_blocked(struct intel_context *ce)
339 lockdep_assert_held(&ce->guc_state.lock);
341 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */
343 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED;
346 static inline bool context_has_committed_requests(struct intel_context *ce)
348 return !!ce->guc_state.number_committed_requests;
351 static inline void incr_context_committed_requests(struct intel_context *ce)
353 lockdep_assert_held(&ce->guc_state.lock);
354 ++ce->guc_state.number_committed_requests;
355 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
358 static inline void decr_context_committed_requests(struct intel_context *ce)
360 lockdep_assert_held(&ce->guc_state.lock);
361 --ce->guc_state.number_committed_requests;
362 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
365 static struct intel_context *
366 request_to_scheduling_context(struct i915_request *rq)
368 return intel_context_to_parent(rq->context);
371 static inline bool context_guc_id_invalid(struct intel_context *ce)
373 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID;
376 static inline void set_context_guc_id_invalid(struct intel_context *ce)
378 ce->guc_id.id = GUC_INVALID_CONTEXT_ID;
381 static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
383 return &ce->engine->gt->uc.guc;
386 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
388 return rb_entry(rb, struct i915_priolist, node);
392 * When using multi-lrc submission a scratch memory area is reserved in the
393 * parent's context state for the process descriptor, work queue, and handshake
394 * between the parent + children contexts to insert safe preemption points
395 * between each of the BBs. Currently the scratch area is sized to a page.
397 * The layout of this scratch area is below:
399 * + sizeof(struct guc_process_desc) child go
400 * + CACHELINE_BYTES child join[0]
402 * + CACHELINE_BYTES child join[n - 1]
404 * PARENT_SCRATCH_SIZE / 2 work queue start
406 * PARENT_SCRATCH_SIZE - 1 work queue end
408 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2)
409 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE)
411 struct sync_semaphore {
413 u8 unused[CACHELINE_BYTES - sizeof(u32)];
416 struct parent_scratch {
418 struct guc_sched_wq_desc wq_desc;
419 struct guc_process_desc_v69 pdesc;
422 struct sync_semaphore go;
423 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
425 u8 unused[WQ_OFFSET - sizeof(union guc_descs) -
426 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
428 u32 wq[WQ_SIZE / sizeof(u32)];
431 static u32 __get_parent_scratch_offset(struct intel_context *ce)
433 GEM_BUG_ON(!ce->parallel.guc.parent_page);
435 return ce->parallel.guc.parent_page * PAGE_SIZE;
438 static u32 __get_wq_offset(struct intel_context *ce)
440 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET);
442 return __get_parent_scratch_offset(ce) + WQ_OFFSET;
445 static struct parent_scratch *
446 __get_parent_scratch(struct intel_context *ce)
448 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE);
449 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES);
452 * Need to subtract LRC_STATE_OFFSET here as the
453 * parallel.guc.parent_page is the offset into ce->state while
454 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET.
456 return (struct parent_scratch *)
458 ((__get_parent_scratch_offset(ce) -
459 LRC_STATE_OFFSET) / sizeof(u32)));
462 static struct guc_process_desc_v69 *
463 __get_process_desc_v69(struct intel_context *ce)
465 struct parent_scratch *ps = __get_parent_scratch(ce);
467 return &ps->descs.pdesc;
470 static struct guc_sched_wq_desc *
471 __get_wq_desc_v70(struct intel_context *ce)
473 struct parent_scratch *ps = __get_parent_scratch(ce);
475 return &ps->descs.wq_desc;
478 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size)
481 * Check for space in work queue. Caching a value of head pointer in
482 * intel_context structure in order reduce the number accesses to shared
483 * GPU memory which may be across a PCIe bus.
485 #define AVAILABLE_SPACE \
486 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
487 if (wqi_size > AVAILABLE_SPACE) {
488 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head);
490 if (wqi_size > AVAILABLE_SPACE)
493 #undef AVAILABLE_SPACE
495 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)];
498 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
500 struct intel_context *ce = xa_load(&guc->context_lookup, id);
502 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID);
507 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index)
509 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69;
514 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID);
519 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc)
524 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) *
526 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69,
527 (void **)&guc->lrc_desc_pool_vaddr_v69);
534 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc)
536 if (!guc->lrc_desc_pool_vaddr_v69)
539 guc->lrc_desc_pool_vaddr_v69 = NULL;
540 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP);
543 static inline bool guc_submission_initialized(struct intel_guc *guc)
545 return guc->submission_initialized;
548 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id)
550 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id);
553 memset(desc, 0, sizeof(*desc));
556 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id)
558 return __get_context(guc, id);
561 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id,
562 struct intel_context *ce)
567 * xarray API doesn't have xa_save_irqsave wrapper, so calling the
568 * lower level functions directly.
570 xa_lock_irqsave(&guc->context_lookup, flags);
571 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC);
572 xa_unlock_irqrestore(&guc->context_lookup, flags);
575 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id)
579 if (unlikely(!guc_submission_initialized(guc)))
582 _reset_lrc_desc_v69(guc, id);
585 * xarray API doesn't have xa_erase_irqsave wrapper, so calling
586 * the lower level functions directly.
588 xa_lock_irqsave(&guc->context_lookup, flags);
589 __xa_erase(&guc->context_lookup, id);
590 xa_unlock_irqrestore(&guc->context_lookup, flags);
593 static void decr_outstanding_submission_g2h(struct intel_guc *guc)
595 if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
596 wake_up_all(&guc->ct.wq);
599 static int guc_submission_send_busy_loop(struct intel_guc *guc,
606 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
607 * so we don't handle the case where we don't get a reply because we
608 * aborted the send due to the channel being busy.
610 GEM_BUG_ON(g2h_len_dw && !loop);
613 atomic_inc(&guc->outstanding_submission_g2h);
615 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
618 int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
623 const int state = interruptible ?
624 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
628 GEM_BUG_ON(timeout < 0);
630 if (!atomic_read(wait_var))
637 prepare_to_wait(&guc->ct.wq, &wait, state);
639 if (!atomic_read(wait_var))
642 if (signal_pending_state(state, current)) {
652 timeout = io_schedule_timeout(timeout);
654 finish_wait(&guc->ct.wq, &wait);
656 return (timeout < 0) ? timeout : 0;
659 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
661 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc))
664 return intel_guc_wait_for_pending_msg(guc,
665 &guc->outstanding_submission_g2h,
669 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop);
670 static int try_context_registration(struct intel_context *ce, bool loop);
672 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
675 struct intel_context *ce = request_to_scheduling_context(rq);
681 lockdep_assert_held(&rq->engine->sched_engine->lock);
684 * Corner case where requests were sitting in the priority list or a
685 * request resubmitted after the context was banned.
687 if (unlikely(intel_context_is_banned(ce))) {
688 i915_request_put(i915_request_mark_eio(rq));
689 intel_engine_signal_breadcrumbs(ce->engine);
693 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
694 GEM_BUG_ON(context_guc_id_invalid(ce));
696 if (context_policy_required(ce)) {
697 err = guc_context_policy_init_v70(ce, false);
702 spin_lock(&ce->guc_state.lock);
705 * The request / context will be run on the hardware when scheduling
706 * gets enabled in the unblock. For multi-lrc we still submit the
707 * context to move the LRC tails.
709 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce)))
712 enabled = context_enabled(ce) || context_blocked(ce);
715 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
716 action[len++] = ce->guc_id.id;
717 action[len++] = GUC_CONTEXT_ENABLE;
718 set_context_pending_enable(ce);
719 intel_context_get(ce);
720 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
722 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT;
723 action[len++] = ce->guc_id.id;
726 err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
727 if (!enabled && !err) {
728 trace_intel_context_sched_enable(ce);
729 atomic_inc(&guc->outstanding_submission_g2h);
730 set_context_enabled(ce);
733 * Without multi-lrc KMD does the submission step (moving the
734 * lrc tail) so enabling scheduling is sufficient to submit the
735 * context. This isn't the case in multi-lrc submission as the
736 * GuC needs to move the tails, hence the need for another H2G
737 * to submit a multi-lrc context after enabling scheduling.
739 if (intel_context_is_parent(ce)) {
740 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT;
741 err = intel_guc_send_nb(guc, action, len - 1, 0);
743 } else if (!enabled) {
744 clr_context_pending_enable(ce);
745 intel_context_put(ce);
748 trace_i915_request_guc_submit(rq);
751 spin_unlock(&ce->guc_state.lock);
755 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
757 int ret = __guc_add_request(guc, rq);
759 if (unlikely(ret == -EBUSY)) {
760 guc->stalled_request = rq;
761 guc->submission_stall_reason = STALL_ADD_REQUEST;
767 static inline void guc_set_lrc_tail(struct i915_request *rq)
769 rq->context->lrc_reg_state[CTX_RING_TAIL] =
770 intel_ring_set_tail(rq->ring, rq->tail);
773 static inline int rq_prio(const struct i915_request *rq)
775 return rq->sched.attr.priority;
778 static bool is_multi_lrc_rq(struct i915_request *rq)
780 return intel_context_is_parallel(rq->context);
783 static bool can_merge_rq(struct i915_request *rq,
784 struct i915_request *last)
786 return request_to_scheduling_context(rq) ==
787 request_to_scheduling_context(last);
790 static u32 wq_space_until_wrap(struct intel_context *ce)
792 return (WQ_SIZE - ce->parallel.guc.wqi_tail);
795 static void write_wqi(struct intel_context *ce, u32 wqi_size)
797 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
800 * Ensure WQI are visible before updating tail
802 intel_guc_write_barrier(ce_to_guc(ce));
804 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
806 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail);
809 static int guc_wq_noop_append(struct intel_context *ce)
811 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce));
812 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
817 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
819 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
820 FIELD_PREP(WQ_LEN_MASK, len_dw);
821 ce->parallel.guc.wqi_tail = 0;
826 static int __guc_wq_item_append(struct i915_request *rq)
828 struct intel_context *ce = request_to_scheduling_context(rq);
829 struct intel_context *child;
830 unsigned int wqi_size = (ce->parallel.number_children + 4) *
833 u32 len_dw = (wqi_size / sizeof(u32)) - 1;
836 /* Ensure context is in correct state updating work queue */
837 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
838 GEM_BUG_ON(context_guc_id_invalid(ce));
839 GEM_BUG_ON(context_wait_for_deregister_to_register(ce));
840 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id));
842 /* Insert NOOP if this work queue item will wrap the tail pointer. */
843 if (wqi_size > wq_space_until_wrap(ce)) {
844 ret = guc_wq_noop_append(ce);
849 wqi = get_wq_pointer(ce, wqi_size);
853 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
855 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
856 FIELD_PREP(WQ_LEN_MASK, len_dw);
857 *wqi++ = ce->lrc.lrca;
858 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) |
859 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64));
860 *wqi++ = 0; /* fence_id */
861 for_each_child(ce, child)
862 *wqi++ = child->ring->tail / sizeof(u64);
864 write_wqi(ce, wqi_size);
869 static int guc_wq_item_append(struct intel_guc *guc,
870 struct i915_request *rq)
872 struct intel_context *ce = request_to_scheduling_context(rq);
875 if (likely(!intel_context_is_banned(ce))) {
876 ret = __guc_wq_item_append(rq);
878 if (unlikely(ret == -EBUSY)) {
879 guc->stalled_request = rq;
880 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL;
887 static bool multi_lrc_submit(struct i915_request *rq)
889 struct intel_context *ce = request_to_scheduling_context(rq);
891 intel_ring_set_tail(rq->ring, rq->tail);
894 * We expect the front end (execbuf IOCTL) to set this flag on the last
895 * request generated from a multi-BB submission. This indicates to the
896 * backend (GuC interface) that we should submit this context thus
897 * submitting all the requests generated in parallel.
899 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) ||
900 intel_context_is_banned(ce);
903 static int guc_dequeue_one_context(struct intel_guc *guc)
905 struct i915_sched_engine * const sched_engine = guc->sched_engine;
906 struct i915_request *last = NULL;
911 lockdep_assert_held(&sched_engine->lock);
913 if (guc->stalled_request) {
915 last = guc->stalled_request;
917 switch (guc->submission_stall_reason) {
918 case STALL_REGISTER_CONTEXT:
919 goto register_context;
920 case STALL_MOVE_LRC_TAIL:
922 case STALL_ADD_REQUEST:
925 MISSING_CASE(guc->submission_stall_reason);
929 while ((rb = rb_first_cached(&sched_engine->queue))) {
930 struct i915_priolist *p = to_priolist(rb);
931 struct i915_request *rq, *rn;
933 priolist_for_each_request_consume(rq, rn, p) {
934 if (last && !can_merge_rq(rq, last))
935 goto register_context;
937 list_del_init(&rq->sched.link);
939 __i915_request_submit(rq);
941 trace_i915_request_in(rq, 0);
944 if (is_multi_lrc_rq(rq)) {
946 * We need to coalesce all multi-lrc requests in
947 * a relationship into a single H2G. We are
948 * guaranteed that all of these requests will be
949 * submitted sequentially.
951 if (multi_lrc_submit(rq)) {
953 goto register_context;
960 rb_erase_cached(&p->node, &sched_engine->queue);
961 i915_priolist_free(p);
966 struct intel_context *ce = request_to_scheduling_context(last);
968 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) &&
969 !intel_context_is_banned(ce))) {
970 ret = try_context_registration(ce, false);
971 if (unlikely(ret == -EPIPE)) {
973 } else if (ret == -EBUSY) {
974 guc->stalled_request = last;
975 guc->submission_stall_reason =
976 STALL_REGISTER_CONTEXT;
977 goto schedule_tasklet;
978 } else if (ret != 0) {
979 GEM_WARN_ON(ret); /* Unexpected */
985 if (is_multi_lrc_rq(last)) {
986 ret = guc_wq_item_append(guc, last);
988 goto schedule_tasklet;
989 } else if (ret != 0) {
990 GEM_WARN_ON(ret); /* Unexpected */
994 guc_set_lrc_tail(last);
998 ret = guc_add_request(guc, last);
999 if (unlikely(ret == -EPIPE)) {
1001 } else if (ret == -EBUSY) {
1002 goto schedule_tasklet;
1003 } else if (ret != 0) {
1004 GEM_WARN_ON(ret); /* Unexpected */
1009 guc->stalled_request = NULL;
1010 guc->submission_stall_reason = STALL_NONE;
1014 sched_engine->tasklet.callback = NULL;
1015 tasklet_disable_nosync(&sched_engine->tasklet);
1019 tasklet_schedule(&sched_engine->tasklet);
1023 static void guc_submission_tasklet(struct tasklet_struct *t)
1025 struct i915_sched_engine *sched_engine =
1026 from_tasklet(sched_engine, t, tasklet);
1027 unsigned long flags;
1030 spin_lock_irqsave(&sched_engine->lock, flags);
1033 loop = guc_dequeue_one_context(sched_engine->private_data);
1036 i915_sched_engine_reset_on_empty(sched_engine);
1038 spin_unlock_irqrestore(&sched_engine->lock, flags);
1041 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir)
1043 if (iir & GT_RENDER_USER_INTERRUPT)
1044 intel_engine_signal_breadcrumbs(engine);
1047 static void __guc_context_destroy(struct intel_context *ce);
1048 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce);
1049 static void guc_signal_context_fence(struct intel_context *ce);
1050 static void guc_cancel_context_requests(struct intel_context *ce);
1051 static void guc_blocked_fence_complete(struct intel_context *ce);
1053 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
1055 struct intel_context *ce;
1056 unsigned long index, flags;
1057 bool pending_disable, pending_enable, deregister, destroyed, banned;
1059 xa_lock_irqsave(&guc->context_lookup, flags);
1060 xa_for_each(&guc->context_lookup, index, ce) {
1062 * Corner case where the ref count on the object is zero but and
1063 * deregister G2H was lost. In this case we don't touch the ref
1064 * count and finish the destroy of the context.
1066 bool do_put = kref_get_unless_zero(&ce->ref);
1068 xa_unlock(&guc->context_lookup);
1070 spin_lock(&ce->guc_state.lock);
1073 * Once we are at this point submission_disabled() is guaranteed
1074 * to be visible to all callers who set the below flags (see above
1075 * flush and flushes in reset_prepare). If submission_disabled()
1076 * is set, the caller shouldn't set these flags.
1079 destroyed = context_destroyed(ce);
1080 pending_enable = context_pending_enable(ce);
1081 pending_disable = context_pending_disable(ce);
1082 deregister = context_wait_for_deregister_to_register(ce);
1083 banned = context_banned(ce);
1084 init_sched_state(ce);
1086 spin_unlock(&ce->guc_state.lock);
1088 if (pending_enable || destroyed || deregister) {
1089 decr_outstanding_submission_g2h(guc);
1091 guc_signal_context_fence(ce);
1093 intel_gt_pm_put_async(guc_to_gt(guc));
1094 release_guc_id(guc, ce);
1095 __guc_context_destroy(ce);
1097 if (pending_enable || deregister)
1098 intel_context_put(ce);
1101 /* Not mutualy exclusive with above if statement. */
1102 if (pending_disable) {
1103 guc_signal_context_fence(ce);
1105 guc_cancel_context_requests(ce);
1106 intel_engine_signal_breadcrumbs(ce->engine);
1108 intel_context_sched_disable_unpin(ce);
1109 decr_outstanding_submission_g2h(guc);
1111 spin_lock(&ce->guc_state.lock);
1112 guc_blocked_fence_complete(ce);
1113 spin_unlock(&ce->guc_state.lock);
1115 intel_context_put(ce);
1119 intel_context_put(ce);
1120 xa_lock(&guc->context_lookup);
1122 xa_unlock_irqrestore(&guc->context_lookup, flags);
1126 * GuC stores busyness stats for each engine at context in/out boundaries. A
1127 * context 'in' logs execution start time, 'out' adds in -> out delta to total.
1128 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with
1131 * __i915_pmu_event_read samples engine busyness. When sampling, if context id
1132 * is valid (!= ~0) and start is non-zero, the engine is considered to be
1133 * active. For an active engine total busyness = total + (now - start), where
1134 * 'now' is the time at which the busyness is sampled. For inactive engine,
1135 * total busyness = total.
1137 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain.
1139 * The start and total values provided by GuC are 32 bits and wrap around in a
1140 * few minutes. Since perf pmu provides busyness as 64 bit monotonically
1141 * increasing ns values, there is a need for this implementation to account for
1142 * overflows and extend the GuC provided values to 64 bits before returning
1143 * busyness to the user. In order to do that, a worker runs periodically at
1144 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in
1145 * 27 seconds for a gt clock frequency of 19.2 MHz).
1148 #define WRAP_TIME_CLKS U32_MAX
1149 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3)
1152 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
1154 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
1155 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp);
1157 if (new_start == lower_32_bits(*prev_start))
1161 * When gt is unparked, we update the gt timestamp and start the ping
1162 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
1163 * is unparked, all switched in contexts will have a start time that is
1164 * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
1166 * If neither gt_stamp nor new_start has rolled over, then the
1167 * gt_stamp_hi does not need to be adjusted, however if one of them has
1168 * rolled over, we need to adjust gt_stamp_hi accordingly.
1170 * The below conditions address the cases of new_start rollover and
1171 * gt_stamp_last rollover respectively.
1173 if (new_start < gt_stamp_last &&
1174 (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
1177 if (new_start > gt_stamp_last &&
1178 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi)
1181 *prev_start = ((u64)gt_stamp_hi << 32) | new_start;
1184 #define record_read(map_, field_) \
1185 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_)
1188 * GuC updates shared memory and KMD reads it. Since this is not synchronized,
1189 * we run into a race where the value read is inconsistent. Sometimes the
1190 * inconsistency is in reading the upper MSB bytes of the last_in value when
1191 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
1192 * 24 bits are zero. Since these are non-zero values, it is non-trivial to
1193 * determine validity of these values. Instead we read the values multiple times
1194 * until they are consistent. In test runs, 3 attempts results in consistent
1195 * values. The upper bound is set to 6 attempts and may need to be tuned as per
1196 * any new occurences.
1198 static void __get_engine_usage_record(struct intel_engine_cs *engine,
1199 u32 *last_in, u32 *id, u32 *total)
1201 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine);
1205 *last_in = record_read(&rec_map, last_switch_in_stamp);
1206 *id = record_read(&rec_map, current_context_index);
1207 *total = record_read(&rec_map, total_runtime);
1209 if (record_read(&rec_map, last_switch_in_stamp) == *last_in &&
1210 record_read(&rec_map, current_context_index) == *id &&
1211 record_read(&rec_map, total_runtime) == *total)
1216 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
1218 struct intel_engine_guc_stats *stats = &engine->stats.guc;
1219 struct intel_guc *guc = &engine->gt->uc.guc;
1220 u32 last_switch, ctx_id, total;
1222 lockdep_assert_held(&guc->timestamp.lock);
1224 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
1226 stats->running = ctx_id != ~0U && last_switch;
1228 __extend_last_switch(guc, &stats->start_gt_clk, last_switch);
1231 * Instead of adjusting the total for overflow, just add the
1232 * difference from previous sample stats->total_gt_clks
1234 if (total && total != ~0U) {
1235 stats->total_gt_clks += (u32)(total - stats->prev_total);
1236 stats->prev_total = total;
1240 static u32 gpm_timestamp_shift(struct intel_gt *gt)
1242 intel_wakeref_t wakeref;
1245 with_intel_runtime_pm(gt->uncore->rpm, wakeref)
1246 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0);
1248 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
1249 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;
1254 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
1256 struct intel_gt *gt = guc_to_gt(guc);
1257 u32 gt_stamp_lo, gt_stamp_hi;
1260 lockdep_assert_held(&guc->timestamp.lock);
1262 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
1263 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0,
1264 MISC_STATUS1) >> guc->timestamp.shift;
1265 gt_stamp_lo = lower_32_bits(gpm_ts);
1268 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp))
1271 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo;
1275 * Unlike the execlist mode of submission total and active times are in terms of
1276 * gt clocks. The *now parameter is retained to return the cpu time at which the
1277 * busyness was sampled.
1279 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
1281 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc;
1282 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
1283 struct intel_gt *gt = engine->gt;
1284 struct intel_guc *guc = >->uc.guc;
1285 u64 total, gt_stamp_saved;
1286 unsigned long flags;
1290 spin_lock_irqsave(&guc->timestamp.lock, flags);
1293 * If a reset happened, we risk reading partially updated engine
1294 * busyness from GuC, so we just use the driver stored copy of busyness.
1295 * Synchronize with gt reset using reset_count and the
1296 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count
1297 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is
1298 * usable by checking the flag afterwards.
1300 reset_count = i915_reset_count(gpu_error);
1301 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags);
1306 * The active busyness depends on start_gt_clk and gt_stamp.
1307 * gt_stamp is updated by i915 only when gt is awake and the
1308 * start_gt_clk is derived from GuC state. To get a consistent
1309 * view of activity, we query the GuC state only if gt is awake.
1311 if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
1312 stats_saved = *stats;
1313 gt_stamp_saved = guc->timestamp.gt_stamp;
1315 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
1316 * start_gt_clk' calculation below for active engines.
1318 guc_update_engine_gt_clks(engine);
1319 guc_update_pm_timestamp(guc, now);
1320 intel_gt_pm_put_async(gt);
1321 if (i915_reset_count(gpu_error) != reset_count) {
1322 *stats = stats_saved;
1323 guc->timestamp.gt_stamp = gt_stamp_saved;
1327 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks);
1328 if (stats->running) {
1329 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
1331 total += intel_gt_clock_interval_to_ns(gt, clk);
1334 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1336 return ns_to_ktime(total);
1339 static void __reset_guc_busyness_stats(struct intel_guc *guc)
1341 struct intel_gt *gt = guc_to_gt(guc);
1342 struct intel_engine_cs *engine;
1343 enum intel_engine_id id;
1344 unsigned long flags;
1347 cancel_delayed_work_sync(&guc->timestamp.work);
1349 spin_lock_irqsave(&guc->timestamp.lock, flags);
1351 guc_update_pm_timestamp(guc, &unused);
1352 for_each_engine(engine, gt, id) {
1353 guc_update_engine_gt_clks(engine);
1354 engine->stats.guc.prev_total = 0;
1357 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1360 static void __update_guc_busyness_stats(struct intel_guc *guc)
1362 struct intel_gt *gt = guc_to_gt(guc);
1363 struct intel_engine_cs *engine;
1364 enum intel_engine_id id;
1365 unsigned long flags;
1368 spin_lock_irqsave(&guc->timestamp.lock, flags);
1370 guc_update_pm_timestamp(guc, &unused);
1371 for_each_engine(engine, gt, id)
1372 guc_update_engine_gt_clks(engine);
1374 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1377 static void guc_timestamp_ping(struct work_struct *wrk)
1379 struct intel_guc *guc = container_of(wrk, typeof(*guc),
1380 timestamp.work.work);
1381 struct intel_uc *uc = container_of(guc, typeof(*uc), guc);
1382 struct intel_gt *gt = guc_to_gt(guc);
1383 intel_wakeref_t wakeref;
1387 * Synchronize with gt reset to make sure the worker does not
1388 * corrupt the engine/guc stats.
1390 ret = intel_gt_reset_trylock(gt, &srcu);
1394 with_intel_runtime_pm(>->i915->runtime_pm, wakeref)
1395 __update_guc_busyness_stats(guc);
1397 intel_gt_reset_unlock(gt, srcu);
1399 mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
1400 guc->timestamp.ping_delay);
1403 static int guc_action_enable_usage_stats(struct intel_guc *guc)
1405 u32 offset = intel_guc_engine_usage_offset(guc);
1407 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
1412 return intel_guc_send(guc, action, ARRAY_SIZE(action));
1415 static void guc_init_engine_stats(struct intel_guc *guc)
1417 struct intel_gt *gt = guc_to_gt(guc);
1418 intel_wakeref_t wakeref;
1420 mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
1421 guc->timestamp.ping_delay);
1423 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) {
1424 int ret = guc_action_enable_usage_stats(guc);
1427 drm_err(>->i915->drm,
1428 "Failed to enable usage stats: %d!\n", ret);
1432 void intel_guc_busyness_park(struct intel_gt *gt)
1434 struct intel_guc *guc = >->uc.guc;
1436 if (!guc_submission_initialized(guc))
1439 cancel_delayed_work(&guc->timestamp.work);
1440 __update_guc_busyness_stats(guc);
1443 void intel_guc_busyness_unpark(struct intel_gt *gt)
1445 struct intel_guc *guc = >->uc.guc;
1446 unsigned long flags;
1449 if (!guc_submission_initialized(guc))
1452 spin_lock_irqsave(&guc->timestamp.lock, flags);
1453 guc_update_pm_timestamp(guc, &unused);
1454 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1455 mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
1456 guc->timestamp.ping_delay);
1460 submission_disabled(struct intel_guc *guc)
1462 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1464 return unlikely(!sched_engine ||
1465 !__tasklet_is_enabled(&sched_engine->tasklet) ||
1466 intel_gt_is_wedged(guc_to_gt(guc)));
1469 static void disable_submission(struct intel_guc *guc)
1471 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1473 if (__tasklet_is_enabled(&sched_engine->tasklet)) {
1474 GEM_BUG_ON(!guc->ct.enabled);
1475 __tasklet_disable_sync_once(&sched_engine->tasklet);
1476 sched_engine->tasklet.callback = NULL;
1480 static void enable_submission(struct intel_guc *guc)
1482 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1483 unsigned long flags;
1485 spin_lock_irqsave(&guc->sched_engine->lock, flags);
1486 sched_engine->tasklet.callback = guc_submission_tasklet;
1487 wmb(); /* Make sure callback visible */
1488 if (!__tasklet_is_enabled(&sched_engine->tasklet) &&
1489 __tasklet_enable(&sched_engine->tasklet)) {
1490 GEM_BUG_ON(!guc->ct.enabled);
1492 /* And kick in case we missed a new request submission. */
1493 tasklet_hi_schedule(&sched_engine->tasklet);
1495 spin_unlock_irqrestore(&guc->sched_engine->lock, flags);
1498 static void guc_flush_submissions(struct intel_guc *guc)
1500 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1501 unsigned long flags;
1503 spin_lock_irqsave(&sched_engine->lock, flags);
1504 spin_unlock_irqrestore(&sched_engine->lock, flags);
1507 static void guc_flush_destroyed_contexts(struct intel_guc *guc);
1509 void intel_guc_submission_reset_prepare(struct intel_guc *guc)
1511 if (unlikely(!guc_submission_initialized(guc))) {
1512 /* Reset called during driver load? GuC not yet initialised! */
1516 intel_gt_park_heartbeats(guc_to_gt(guc));
1517 disable_submission(guc);
1518 guc->interrupts.disable(guc);
1519 __reset_guc_busyness_stats(guc);
1521 /* Flush IRQ handler */
1522 spin_lock_irq(&guc_to_gt(guc)->irq_lock);
1523 spin_unlock_irq(&guc_to_gt(guc)->irq_lock);
1525 guc_flush_submissions(guc);
1526 guc_flush_destroyed_contexts(guc);
1527 flush_work(&guc->ct.requests.worker);
1529 scrub_guc_desc_for_outstanding_g2h(guc);
1532 static struct intel_engine_cs *
1533 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling)
1535 struct intel_engine_cs *engine;
1536 intel_engine_mask_t tmp, mask = ve->mask;
1537 unsigned int num_siblings = 0;
1539 for_each_engine_masked(engine, ve->gt, mask, tmp)
1540 if (num_siblings++ == sibling)
1546 static inline struct intel_engine_cs *
1547 __context_to_physical_engine(struct intel_context *ce)
1549 struct intel_engine_cs *engine = ce->engine;
1551 if (intel_engine_is_virtual(engine))
1552 engine = guc_virtual_get_sibling(engine, 0);
1557 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub)
1559 struct intel_engine_cs *engine = __context_to_physical_engine(ce);
1561 if (intel_context_is_banned(ce))
1564 GEM_BUG_ON(!intel_context_is_pinned(ce));
1567 * We want a simple context + ring to execute the breadcrumb update.
1568 * We cannot rely on the context being intact across the GPU hang,
1569 * so clear it and rebuild just what we need for the breadcrumb.
1570 * All pending requests for this context will be zapped, and any
1571 * future request will be after userspace has had the opportunity
1572 * to recreate its own state.
1575 lrc_init_regs(ce, engine, true);
1577 /* Rerun the request; its payload has been neutered (if guilty). */
1578 lrc_update_regs(ce, engine, head);
1581 static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
1583 static const i915_reg_t _reg[I915_NUM_ENGINES] = {
1584 [RCS0] = MSG_IDLE_CS,
1585 [BCS0] = MSG_IDLE_BCS,
1586 [VCS0] = MSG_IDLE_VCS0,
1587 [VCS1] = MSG_IDLE_VCS1,
1588 [VCS2] = MSG_IDLE_VCS2,
1589 [VCS3] = MSG_IDLE_VCS3,
1590 [VCS4] = MSG_IDLE_VCS4,
1591 [VCS5] = MSG_IDLE_VCS5,
1592 [VCS6] = MSG_IDLE_VCS6,
1593 [VCS7] = MSG_IDLE_VCS7,
1594 [VECS0] = MSG_IDLE_VECS0,
1595 [VECS1] = MSG_IDLE_VECS1,
1596 [VECS2] = MSG_IDLE_VECS2,
1597 [VECS3] = MSG_IDLE_VECS3,
1598 [CCS0] = MSG_IDLE_CS,
1599 [CCS1] = MSG_IDLE_CS,
1600 [CCS2] = MSG_IDLE_CS,
1601 [CCS3] = MSG_IDLE_CS,
1605 if (!_reg[engine->id].reg)
1608 val = intel_uncore_read(engine->uncore, _reg[engine->id]);
1610 /* bits[29:25] & bits[13:9] >> shift */
1611 return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT;
1614 static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask)
1618 /* Ensure GPM receives fw up/down after CS is stopped */
1621 /* Wait for forcewake request to complete in GPM */
1622 ret = __intel_wait_for_register_fw(gt->uncore,
1623 GEN9_PWRGT_DOMAIN_STATUS,
1624 fw_mask, fw_mask, 5000, 0, NULL);
1626 /* Ensure CS receives fw ack from GPM */
1630 GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret);
1634 * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any
1635 * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The
1636 * pending status is indicated by bits[13:9] (masked by bits[ 29:25]) in the
1637 * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we
1638 * are concerned only with the gt reset here, we use a logical OR of pending
1639 * forcewakeups from all reset domains and then wait for them to complete by
1640 * querying PWRGT_DOMAIN_STATUS.
1642 static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
1646 if (GRAPHICS_VER(engine->i915) != 12)
1651 * TODO: Occasionally trying to stop the cs times out, but does not
1652 * adversely affect functionality. The timeout is set as a config
1653 * parameter that defaults to 100ms. Assuming that this timeout is
1654 * sufficient for any pending MI_FORCEWAKEs to complete, ignore the
1655 * timeout returned here until it is root caused.
1657 intel_engine_stop_cs(engine);
1659 fw_pending = __cs_pending_mi_force_wakes(engine);
1661 __gpm_wait_for_fw_complete(engine->gt, fw_pending);
1664 static void guc_reset_nop(struct intel_engine_cs *engine)
1668 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled)
1673 __unwind_incomplete_requests(struct intel_context *ce)
1675 struct i915_request *rq, *rn;
1676 struct list_head *pl;
1677 int prio = I915_PRIORITY_INVALID;
1678 struct i915_sched_engine * const sched_engine =
1679 ce->engine->sched_engine;
1680 unsigned long flags;
1682 spin_lock_irqsave(&sched_engine->lock, flags);
1683 spin_lock(&ce->guc_state.lock);
1684 list_for_each_entry_safe_reverse(rq, rn,
1685 &ce->guc_state.requests,
1687 if (i915_request_completed(rq))
1690 list_del_init(&rq->sched.link);
1691 __i915_request_unsubmit(rq);
1693 /* Push the request back into the queue for later resubmission. */
1694 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1695 if (rq_prio(rq) != prio) {
1697 pl = i915_sched_lookup_priolist(sched_engine, prio);
1699 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine));
1701 list_add(&rq->sched.link, pl);
1702 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1704 spin_unlock(&ce->guc_state.lock);
1705 spin_unlock_irqrestore(&sched_engine->lock, flags);
1708 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled)
1711 struct i915_request *rq;
1712 unsigned long flags;
1714 int i, number_children = ce->parallel.number_children;
1715 struct intel_context *parent = ce;
1717 GEM_BUG_ON(intel_context_is_child(ce));
1719 intel_context_get(ce);
1722 * GuC will implicitly mark the context as non-schedulable when it sends
1723 * the reset notification. Make sure our state reflects this change. The
1724 * context will be marked enabled on resubmission.
1726 spin_lock_irqsave(&ce->guc_state.lock, flags);
1727 clr_context_enabled(ce);
1728 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1731 * For each context in the relationship find the hanging request
1732 * resetting each context / request as needed
1734 for (i = 0; i < number_children + 1; ++i) {
1735 if (!intel_context_is_pinned(ce))
1739 rq = intel_context_find_active_request(ce);
1741 head = ce->ring->tail;
1745 if (i915_request_started(rq))
1746 guilty = stalled & ce->engine->mask;
1748 GEM_BUG_ON(i915_active_is_idle(&ce->active));
1749 head = intel_ring_wrap(ce->ring, rq->head);
1751 __i915_request_reset(rq, guilty);
1753 guc_reset_state(ce, head, guilty);
1755 if (i != number_children)
1756 ce = list_next_entry(ce, parallel.child_link);
1759 __unwind_incomplete_requests(parent);
1760 intel_context_put(parent);
1763 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled)
1765 struct intel_context *ce;
1766 unsigned long index;
1767 unsigned long flags;
1769 if (unlikely(!guc_submission_initialized(guc))) {
1770 /* Reset called during driver load? GuC not yet initialised! */
1774 xa_lock_irqsave(&guc->context_lookup, flags);
1775 xa_for_each(&guc->context_lookup, index, ce) {
1776 if (!kref_get_unless_zero(&ce->ref))
1779 xa_unlock(&guc->context_lookup);
1781 if (intel_context_is_pinned(ce) &&
1782 !intel_context_is_child(ce))
1783 __guc_reset_context(ce, stalled);
1785 intel_context_put(ce);
1787 xa_lock(&guc->context_lookup);
1789 xa_unlock_irqrestore(&guc->context_lookup, flags);
1791 /* GuC is blown away, drop all references to contexts */
1792 xa_destroy(&guc->context_lookup);
1795 static void guc_cancel_context_requests(struct intel_context *ce)
1797 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine;
1798 struct i915_request *rq;
1799 unsigned long flags;
1801 /* Mark all executing requests as skipped. */
1802 spin_lock_irqsave(&sched_engine->lock, flags);
1803 spin_lock(&ce->guc_state.lock);
1804 list_for_each_entry(rq, &ce->guc_state.requests, sched.link)
1805 i915_request_put(i915_request_mark_eio(rq));
1806 spin_unlock(&ce->guc_state.lock);
1807 spin_unlock_irqrestore(&sched_engine->lock, flags);
1811 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine)
1813 struct i915_request *rq, *rn;
1815 unsigned long flags;
1817 /* Can be called during boot if GuC fails to load */
1822 * Before we call engine->cancel_requests(), we should have exclusive
1823 * access to the submission state. This is arranged for us by the
1824 * caller disabling the interrupt generation, the tasklet and other
1825 * threads that may then access the same state, giving us a free hand
1826 * to reset state. However, we still need to let lockdep be aware that
1827 * we know this state may be accessed in hardirq context, so we
1828 * disable the irq around this manipulation and we want to keep
1829 * the spinlock focused on its duties and not accidentally conflate
1830 * coverage to the submission's irq state. (Similarly, although we
1831 * shouldn't need to disable irq around the manipulation of the
1832 * submission's irq state, we also wish to remind ourselves that
1835 spin_lock_irqsave(&sched_engine->lock, flags);
1837 /* Flush the queued requests to the timeline list (for retiring). */
1838 while ((rb = rb_first_cached(&sched_engine->queue))) {
1839 struct i915_priolist *p = to_priolist(rb);
1841 priolist_for_each_request_consume(rq, rn, p) {
1842 list_del_init(&rq->sched.link);
1844 __i915_request_submit(rq);
1846 i915_request_put(i915_request_mark_eio(rq));
1849 rb_erase_cached(&p->node, &sched_engine->queue);
1850 i915_priolist_free(p);
1853 /* Remaining _unready_ requests will be nop'ed when submitted */
1855 sched_engine->queue_priority_hint = INT_MIN;
1856 sched_engine->queue = RB_ROOT_CACHED;
1858 spin_unlock_irqrestore(&sched_engine->lock, flags);
1861 void intel_guc_submission_cancel_requests(struct intel_guc *guc)
1863 struct intel_context *ce;
1864 unsigned long index;
1865 unsigned long flags;
1867 xa_lock_irqsave(&guc->context_lookup, flags);
1868 xa_for_each(&guc->context_lookup, index, ce) {
1869 if (!kref_get_unless_zero(&ce->ref))
1872 xa_unlock(&guc->context_lookup);
1874 if (intel_context_is_pinned(ce) &&
1875 !intel_context_is_child(ce))
1876 guc_cancel_context_requests(ce);
1878 intel_context_put(ce);
1880 xa_lock(&guc->context_lookup);
1882 xa_unlock_irqrestore(&guc->context_lookup, flags);
1884 guc_cancel_sched_engine_requests(guc->sched_engine);
1886 /* GuC is blown away, drop all references to contexts */
1887 xa_destroy(&guc->context_lookup);
1890 void intel_guc_submission_reset_finish(struct intel_guc *guc)
1892 /* Reset called during driver load or during wedge? */
1893 if (unlikely(!guc_submission_initialized(guc) ||
1894 intel_gt_is_wedged(guc_to_gt(guc)))) {
1899 * Technically possible for either of these values to be non-zero here,
1900 * but very unlikely + harmless. Regardless let's add a warn so we can
1901 * see in CI if this happens frequently / a precursor to taking down the
1904 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h));
1905 atomic_set(&guc->outstanding_submission_g2h, 0);
1907 intel_guc_global_policies_update(guc);
1908 enable_submission(guc);
1909 intel_gt_unpark_heartbeats(guc_to_gt(guc));
1912 static void destroyed_worker_func(struct work_struct *w);
1913 static void reset_fail_worker_func(struct work_struct *w);
1916 * Set up the memory resources to be shared with the GuC (via the GGTT)
1917 * at firmware loading time.
1919 int intel_guc_submission_init(struct intel_guc *guc)
1921 struct intel_gt *gt = guc_to_gt(guc);
1924 if (guc->submission_initialized)
1927 if (guc->fw.major_ver_found < 70) {
1928 ret = guc_lrc_desc_pool_create_v69(guc);
1933 guc->submission_state.guc_ids_bitmap =
1934 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
1935 if (!guc->submission_state.guc_ids_bitmap) {
1940 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
1941 guc->timestamp.shift = gpm_timestamp_shift(gt);
1942 guc->submission_initialized = true;
1947 guc_lrc_desc_pool_destroy_v69(guc);
1952 void intel_guc_submission_fini(struct intel_guc *guc)
1954 if (!guc->submission_initialized)
1957 guc_flush_destroyed_contexts(guc);
1958 guc_lrc_desc_pool_destroy_v69(guc);
1959 i915_sched_engine_put(guc->sched_engine);
1960 bitmap_free(guc->submission_state.guc_ids_bitmap);
1961 guc->submission_initialized = false;
1964 static inline void queue_request(struct i915_sched_engine *sched_engine,
1965 struct i915_request *rq,
1968 GEM_BUG_ON(!list_empty(&rq->sched.link));
1969 list_add_tail(&rq->sched.link,
1970 i915_sched_lookup_priolist(sched_engine, prio));
1971 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1972 tasklet_hi_schedule(&sched_engine->tasklet);
1975 static int guc_bypass_tasklet_submit(struct intel_guc *guc,
1976 struct i915_request *rq)
1980 __i915_request_submit(rq);
1982 trace_i915_request_in(rq, 0);
1984 if (is_multi_lrc_rq(rq)) {
1985 if (multi_lrc_submit(rq)) {
1986 ret = guc_wq_item_append(guc, rq);
1988 ret = guc_add_request(guc, rq);
1991 guc_set_lrc_tail(rq);
1992 ret = guc_add_request(guc, rq);
1995 if (unlikely(ret == -EPIPE))
1996 disable_submission(guc);
2001 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
2003 struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
2004 struct intel_context *ce = request_to_scheduling_context(rq);
2006 return submission_disabled(guc) || guc->stalled_request ||
2007 !i915_sched_engine_is_empty(sched_engine) ||
2008 !ctx_id_mapped(guc, ce->guc_id.id);
2011 static void guc_submit_request(struct i915_request *rq)
2013 struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
2014 struct intel_guc *guc = &rq->engine->gt->uc.guc;
2015 unsigned long flags;
2017 /* Will be called from irq-context when using foreign fences. */
2018 spin_lock_irqsave(&sched_engine->lock, flags);
2020 if (need_tasklet(guc, rq))
2021 queue_request(sched_engine, rq, rq_prio(rq));
2022 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
2023 tasklet_hi_schedule(&sched_engine->tasklet);
2025 spin_unlock_irqrestore(&sched_engine->lock, flags);
2028 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
2032 GEM_BUG_ON(intel_context_is_child(ce));
2034 if (intel_context_is_parent(ce))
2035 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
2036 NUMBER_MULTI_LRC_GUC_ID(guc),
2037 order_base_2(ce->parallel.number_children
2040 ret = ida_simple_get(&guc->submission_state.guc_ids,
2041 NUMBER_MULTI_LRC_GUC_ID(guc),
2042 guc->submission_state.num_guc_ids,
2043 GFP_KERNEL | __GFP_RETRY_MAYFAIL |
2045 if (unlikely(ret < 0))
2048 ce->guc_id.id = ret;
2052 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
2054 GEM_BUG_ON(intel_context_is_child(ce));
2056 if (!context_guc_id_invalid(ce)) {
2057 if (intel_context_is_parent(ce))
2058 bitmap_release_region(guc->submission_state.guc_ids_bitmap,
2060 order_base_2(ce->parallel.number_children
2063 ida_simple_remove(&guc->submission_state.guc_ids,
2065 clr_ctx_id_mapping(guc, ce->guc_id.id);
2066 set_context_guc_id_invalid(ce);
2068 if (!list_empty(&ce->guc_id.link))
2069 list_del_init(&ce->guc_id.link);
2072 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
2074 unsigned long flags;
2076 spin_lock_irqsave(&guc->submission_state.lock, flags);
2077 __release_guc_id(guc, ce);
2078 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2081 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
2083 struct intel_context *cn;
2085 lockdep_assert_held(&guc->submission_state.lock);
2086 GEM_BUG_ON(intel_context_is_child(ce));
2087 GEM_BUG_ON(intel_context_is_parent(ce));
2089 if (!list_empty(&guc->submission_state.guc_id_list)) {
2090 cn = list_first_entry(&guc->submission_state.guc_id_list,
2091 struct intel_context,
2094 GEM_BUG_ON(atomic_read(&cn->guc_id.ref));
2095 GEM_BUG_ON(context_guc_id_invalid(cn));
2096 GEM_BUG_ON(intel_context_is_child(cn));
2097 GEM_BUG_ON(intel_context_is_parent(cn));
2099 list_del_init(&cn->guc_id.link);
2100 ce->guc_id.id = cn->guc_id.id;
2102 spin_lock(&cn->guc_state.lock);
2103 clr_context_registered(cn);
2104 spin_unlock(&cn->guc_state.lock);
2106 set_context_guc_id_invalid(cn);
2108 #ifdef CONFIG_DRM_I915_SELFTEST
2109 guc->number_guc_id_stolen++;
2118 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce)
2122 lockdep_assert_held(&guc->submission_state.lock);
2123 GEM_BUG_ON(intel_context_is_child(ce));
2125 ret = new_guc_id(guc, ce);
2126 if (unlikely(ret < 0)) {
2127 if (intel_context_is_parent(ce))
2130 ret = steal_guc_id(guc, ce);
2135 if (intel_context_is_parent(ce)) {
2136 struct intel_context *child;
2139 for_each_child(ce, child)
2140 child->guc_id.id = ce->guc_id.id + i++;
2146 #define PIN_GUC_ID_TRIES 4
2147 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
2150 unsigned long flags, tries = PIN_GUC_ID_TRIES;
2152 GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
2155 spin_lock_irqsave(&guc->submission_state.lock, flags);
2157 might_lock(&ce->guc_state.lock);
2159 if (context_guc_id_invalid(ce)) {
2160 ret = assign_guc_id(guc, ce);
2163 ret = 1; /* Indidcates newly assigned guc_id */
2165 if (!list_empty(&ce->guc_id.link))
2166 list_del_init(&ce->guc_id.link);
2167 atomic_inc(&ce->guc_id.ref);
2170 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2173 * -EAGAIN indicates no guc_id are available, let's retire any
2174 * outstanding requests to see if that frees up a guc_id. If the first
2175 * retire didn't help, insert a sleep with the timeslice duration before
2176 * attempting to retire more requests. Double the sleep period each
2177 * subsequent pass before finally giving up. The sleep period has max of
2178 * 100ms and minimum of 1ms.
2180 if (ret == -EAGAIN && --tries) {
2181 if (PIN_GUC_ID_TRIES - tries > 1) {
2182 unsigned int timeslice_shifted =
2183 ce->engine->props.timeslice_duration_ms <<
2184 (PIN_GUC_ID_TRIES - tries - 2);
2185 unsigned int max = min_t(unsigned int, 100,
2188 msleep(max_t(unsigned int, max, 1));
2190 intel_gt_retire_requests(guc_to_gt(guc));
2197 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
2199 unsigned long flags;
2201 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0);
2202 GEM_BUG_ON(intel_context_is_child(ce));
2204 if (unlikely(context_guc_id_invalid(ce) ||
2205 intel_context_is_parent(ce)))
2208 spin_lock_irqsave(&guc->submission_state.lock, flags);
2209 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) &&
2210 !atomic_read(&ce->guc_id.ref))
2211 list_add_tail(&ce->guc_id.link,
2212 &guc->submission_state.guc_id_list);
2213 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2216 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc,
2217 struct intel_context *ce,
2222 struct intel_context *child;
2223 u32 action[4 + MAX_ENGINE_INSTANCE];
2226 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
2228 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
2229 action[len++] = guc_id;
2230 action[len++] = ce->parallel.number_children + 1;
2231 action[len++] = offset;
2232 for_each_child(ce, child) {
2233 offset += sizeof(struct guc_lrc_desc_v69);
2234 action[len++] = offset;
2237 return guc_submission_send_busy_loop(guc, action, len, 0, loop);
2240 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc,
2241 struct intel_context *ce,
2242 struct guc_ctxt_registration_info *info,
2245 struct intel_context *child;
2246 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)];
2250 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
2252 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
2253 action[len++] = info->flags;
2254 action[len++] = info->context_idx;
2255 action[len++] = info->engine_class;
2256 action[len++] = info->engine_submit_mask;
2257 action[len++] = info->wq_desc_lo;
2258 action[len++] = info->wq_desc_hi;
2259 action[len++] = info->wq_base_lo;
2260 action[len++] = info->wq_base_hi;
2261 action[len++] = info->wq_size;
2262 action[len++] = ce->parallel.number_children + 1;
2263 action[len++] = info->hwlrca_lo;
2264 action[len++] = info->hwlrca_hi;
2266 next_id = info->context_idx + 1;
2267 for_each_child(ce, child) {
2268 GEM_BUG_ON(next_id++ != child->guc_id.id);
2271 * NB: GuC interface supports 64 bit LRCA even though i915/HW
2272 * only supports 32 bit currently.
2274 action[len++] = lower_32_bits(child->lrc.lrca);
2275 action[len++] = upper_32_bits(child->lrc.lrca);
2278 GEM_BUG_ON(len > ARRAY_SIZE(action));
2280 return guc_submission_send_busy_loop(guc, action, len, 0, loop);
2283 static int __guc_action_register_context_v69(struct intel_guc *guc,
2289 INTEL_GUC_ACTION_REGISTER_CONTEXT,
2294 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2298 static int __guc_action_register_context_v70(struct intel_guc *guc,
2299 struct guc_ctxt_registration_info *info,
2303 INTEL_GUC_ACTION_REGISTER_CONTEXT,
2307 info->engine_submit_mask,
2317 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2321 static void prepare_context_registration_info_v69(struct intel_context *ce);
2322 static void prepare_context_registration_info_v70(struct intel_context *ce,
2323 struct guc_ctxt_registration_info *info);
2326 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop)
2328 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) +
2329 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69);
2331 prepare_context_registration_info_v69(ce);
2333 if (intel_context_is_parent(ce))
2334 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id,
2337 return __guc_action_register_context_v69(guc, ce->guc_id.id,
2342 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop)
2344 struct guc_ctxt_registration_info info;
2346 prepare_context_registration_info_v70(ce, &info);
2348 if (intel_context_is_parent(ce))
2349 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop);
2351 return __guc_action_register_context_v70(guc, &info, loop);
2354 static int register_context(struct intel_context *ce, bool loop)
2356 struct intel_guc *guc = ce_to_guc(ce);
2359 GEM_BUG_ON(intel_context_is_child(ce));
2360 trace_intel_context_register(ce);
2362 if (guc->fw.major_ver_found >= 70)
2363 ret = register_context_v70(guc, ce, loop);
2365 ret = register_context_v69(guc, ce, loop);
2368 unsigned long flags;
2370 spin_lock_irqsave(&ce->guc_state.lock, flags);
2371 set_context_registered(ce);
2372 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2374 if (guc->fw.major_ver_found >= 70)
2375 guc_context_policy_init_v70(ce, loop);
2381 static int __guc_action_deregister_context(struct intel_guc *guc,
2385 INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
2389 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2390 G2H_LEN_DW_DEREGISTER_CONTEXT,
2394 static int deregister_context(struct intel_context *ce, u32 guc_id)
2396 struct intel_guc *guc = ce_to_guc(ce);
2398 GEM_BUG_ON(intel_context_is_child(ce));
2399 trace_intel_context_deregister(ce);
2401 return __guc_action_deregister_context(guc, guc_id);
2404 static inline void clear_children_join_go_memory(struct intel_context *ce)
2406 struct parent_scratch *ps = __get_parent_scratch(ce);
2409 ps->go.semaphore = 0;
2410 for (i = 0; i < ce->parallel.number_children + 1; ++i)
2411 ps->join[i].semaphore = 0;
2414 static inline u32 get_children_go_value(struct intel_context *ce)
2416 return __get_parent_scratch(ce)->go.semaphore;
2419 static inline u32 get_children_join_value(struct intel_context *ce,
2422 return __get_parent_scratch(ce)->join[child_index].semaphore;
2425 struct context_policy {
2427 struct guc_update_context_policy h2g;
2430 static u32 __guc_context_policy_action_size(struct context_policy *policy)
2432 size_t bytes = sizeof(policy->h2g.header) +
2433 (sizeof(policy->h2g.klv[0]) * policy->count);
2435 return bytes / sizeof(u32);
2438 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id)
2440 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
2441 policy->h2g.header.ctx_id = guc_id;
2445 #define MAKE_CONTEXT_POLICY_ADD(func, id) \
2446 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \
2448 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
2449 policy->h2g.klv[policy->count].kl = \
2450 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
2451 FIELD_PREP(GUC_KLV_0_LEN, 1); \
2452 policy->h2g.klv[policy->count].value = data; \
2456 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
2457 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
2458 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY)
2459 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY)
2461 #undef MAKE_CONTEXT_POLICY_ADD
2463 static int __guc_context_set_context_policies(struct intel_guc *guc,
2464 struct context_policy *policy,
2467 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g,
2468 __guc_context_policy_action_size(policy),
2472 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
2474 struct intel_engine_cs *engine = ce->engine;
2475 struct intel_guc *guc = &engine->gt->uc.guc;
2476 struct context_policy policy;
2477 u32 execution_quantum;
2478 u32 preemption_timeout;
2479 bool missing = false;
2480 unsigned long flags;
2483 /* NB: For both of these, zero means disabled. */
2484 execution_quantum = engine->props.timeslice_duration_ms * 1000;
2485 preemption_timeout = engine->props.preempt_timeout_ms * 1000;
2487 __guc_context_policy_start_klv(&policy, ce->guc_id.id);
2489 __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
2490 __guc_context_policy_add_execution_quantum(&policy, execution_quantum);
2491 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
2493 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
2494 __guc_context_policy_add_preempt_to_idle(&policy, 1);
2496 ret = __guc_context_set_context_policies(guc, &policy, loop);
2499 if (!missing && intel_context_is_parent(ce)) {
2500 struct intel_context *child;
2502 for_each_child(ce, child) {
2503 __guc_context_policy_start_klv(&policy, child->guc_id.id);
2505 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
2506 __guc_context_policy_add_preempt_to_idle(&policy, 1);
2508 child->guc_state.prio = ce->guc_state.prio;
2509 __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
2510 __guc_context_policy_add_execution_quantum(&policy, execution_quantum);
2511 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
2513 ret = __guc_context_set_context_policies(guc, &policy, loop);
2521 spin_lock_irqsave(&ce->guc_state.lock, flags);
2523 set_context_policy_required(ce);
2525 clr_context_policy_required(ce);
2526 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2531 static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
2532 struct guc_lrc_desc_v69 *desc)
2534 desc->policy_flags = 0;
2536 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
2537 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
2539 /* NB: For both of these, zero means disabled. */
2540 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
2541 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
2544 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
2547 * this matches the mapping we do in map_i915_prio_to_guc_prio()
2548 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL)
2554 case GUC_CLIENT_PRIORITY_KMD_NORMAL:
2555 return GEN12_CTX_PRIORITY_NORMAL;
2556 case GUC_CLIENT_PRIORITY_NORMAL:
2557 return GEN12_CTX_PRIORITY_LOW;
2558 case GUC_CLIENT_PRIORITY_HIGH:
2559 case GUC_CLIENT_PRIORITY_KMD_HIGH:
2560 return GEN12_CTX_PRIORITY_HIGH;
2564 static void prepare_context_registration_info_v69(struct intel_context *ce)
2566 struct intel_engine_cs *engine = ce->engine;
2567 struct intel_guc *guc = &engine->gt->uc.guc;
2568 u32 ctx_id = ce->guc_id.id;
2569 struct guc_lrc_desc_v69 *desc;
2570 struct intel_context *child;
2572 GEM_BUG_ON(!engine->mask);
2575 * Ensure LRC + CT vmas are is same region as write barrier is done
2576 * based on CT vma region.
2578 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
2579 i915_gem_object_is_lmem(ce->ring->vma->obj));
2581 desc = __get_lrc_desc_v69(guc, ctx_id);
2582 desc->engine_class = engine_class_to_guc_class(engine->class);
2583 desc->engine_submit_mask = engine->logical_mask;
2584 desc->hw_context_desc = ce->lrc.lrca;
2585 desc->priority = ce->guc_state.prio;
2586 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
2587 guc_context_policy_init_v69(engine, desc);
2590 * If context is a parent, we need to register a process descriptor
2591 * describing a work queue and register all child contexts.
2593 if (intel_context_is_parent(ce)) {
2594 struct guc_process_desc_v69 *pdesc;
2596 ce->parallel.guc.wqi_tail = 0;
2597 ce->parallel.guc.wqi_head = 0;
2599 desc->process_desc = i915_ggtt_offset(ce->state) +
2600 __get_parent_scratch_offset(ce);
2601 desc->wq_addr = i915_ggtt_offset(ce->state) +
2602 __get_wq_offset(ce);
2603 desc->wq_size = WQ_SIZE;
2605 pdesc = __get_process_desc_v69(ce);
2606 memset(pdesc, 0, sizeof(*(pdesc)));
2607 pdesc->stage_id = ce->guc_id.id;
2608 pdesc->wq_base_addr = desc->wq_addr;
2609 pdesc->wq_size_bytes = desc->wq_size;
2610 pdesc->wq_status = WQ_STATUS_ACTIVE;
2612 ce->parallel.guc.wq_head = &pdesc->head;
2613 ce->parallel.guc.wq_tail = &pdesc->tail;
2614 ce->parallel.guc.wq_status = &pdesc->wq_status;
2616 for_each_child(ce, child) {
2617 desc = __get_lrc_desc_v69(guc, child->guc_id.id);
2619 desc->engine_class =
2620 engine_class_to_guc_class(engine->class);
2621 desc->hw_context_desc = child->lrc.lrca;
2622 desc->priority = ce->guc_state.prio;
2623 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
2624 guc_context_policy_init_v69(engine, desc);
2627 clear_children_join_go_memory(ce);
2631 static void prepare_context_registration_info_v70(struct intel_context *ce,
2632 struct guc_ctxt_registration_info *info)
2634 struct intel_engine_cs *engine = ce->engine;
2635 struct intel_guc *guc = &engine->gt->uc.guc;
2636 u32 ctx_id = ce->guc_id.id;
2638 GEM_BUG_ON(!engine->mask);
2641 * Ensure LRC + CT vmas are is same region as write barrier is done
2642 * based on CT vma region.
2644 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
2645 i915_gem_object_is_lmem(ce->ring->vma->obj));
2647 memset(info, 0, sizeof(*info));
2648 info->context_idx = ctx_id;
2649 info->engine_class = engine_class_to_guc_class(engine->class);
2650 info->engine_submit_mask = engine->logical_mask;
2652 * NB: GuC interface supports 64 bit LRCA even though i915/HW
2653 * only supports 32 bit currently.
2655 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca);
2656 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca);
2657 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY)
2658 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio);
2659 info->flags = CONTEXT_REGISTRATION_FLAG_KMD;
2662 * If context is a parent, we need to register a process descriptor
2663 * describing a work queue and register all child contexts.
2665 if (intel_context_is_parent(ce)) {
2666 struct guc_sched_wq_desc *wq_desc;
2667 u64 wq_desc_offset, wq_base_offset;
2669 ce->parallel.guc.wqi_tail = 0;
2670 ce->parallel.guc.wqi_head = 0;
2672 wq_desc_offset = i915_ggtt_offset(ce->state) +
2673 __get_parent_scratch_offset(ce);
2674 wq_base_offset = i915_ggtt_offset(ce->state) +
2675 __get_wq_offset(ce);
2676 info->wq_desc_lo = lower_32_bits(wq_desc_offset);
2677 info->wq_desc_hi = upper_32_bits(wq_desc_offset);
2678 info->wq_base_lo = lower_32_bits(wq_base_offset);
2679 info->wq_base_hi = upper_32_bits(wq_base_offset);
2680 info->wq_size = WQ_SIZE;
2682 wq_desc = __get_wq_desc_v70(ce);
2683 memset(wq_desc, 0, sizeof(*wq_desc));
2684 wq_desc->wq_status = WQ_STATUS_ACTIVE;
2686 ce->parallel.guc.wq_head = &wq_desc->head;
2687 ce->parallel.guc.wq_tail = &wq_desc->tail;
2688 ce->parallel.guc.wq_status = &wq_desc->wq_status;
2690 clear_children_join_go_memory(ce);
2694 static int try_context_registration(struct intel_context *ce, bool loop)
2696 struct intel_engine_cs *engine = ce->engine;
2697 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
2698 struct intel_guc *guc = &engine->gt->uc.guc;
2699 intel_wakeref_t wakeref;
2700 u32 ctx_id = ce->guc_id.id;
2701 bool context_registered;
2704 GEM_BUG_ON(!sched_state_is_init(ce));
2706 context_registered = ctx_id_mapped(guc, ctx_id);
2708 clr_ctx_id_mapping(guc, ctx_id);
2709 set_ctx_id_mapping(guc, ctx_id, ce);
2712 * The context_lookup xarray is used to determine if the hardware
2713 * context is currently registered. There are two cases in which it
2714 * could be registered either the guc_id has been stolen from another
2715 * context or the lrc descriptor address of this context has changed. In
2716 * either case the context needs to be deregistered with the GuC before
2717 * registering this context.
2719 if (context_registered) {
2721 unsigned long flags;
2723 trace_intel_context_steal_guc_id(ce);
2726 /* Seal race with Reset */
2727 spin_lock_irqsave(&ce->guc_state.lock, flags);
2728 disabled = submission_disabled(guc);
2729 if (likely(!disabled)) {
2730 set_context_wait_for_deregister_to_register(ce);
2731 intel_context_get(ce);
2733 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2734 if (unlikely(disabled)) {
2735 clr_ctx_id_mapping(guc, ctx_id);
2736 return 0; /* Will get registered later */
2740 * If stealing the guc_id, this ce has the same guc_id as the
2741 * context whose guc_id was stolen.
2743 with_intel_runtime_pm(runtime_pm, wakeref)
2744 ret = deregister_context(ce, ce->guc_id.id);
2745 if (unlikely(ret == -ENODEV))
2746 ret = 0; /* Will get registered later */
2748 with_intel_runtime_pm(runtime_pm, wakeref)
2749 ret = register_context(ce, loop);
2750 if (unlikely(ret == -EBUSY)) {
2751 clr_ctx_id_mapping(guc, ctx_id);
2752 } else if (unlikely(ret == -ENODEV)) {
2753 clr_ctx_id_mapping(guc, ctx_id);
2754 ret = 0; /* Will get registered later */
2761 static int __guc_context_pre_pin(struct intel_context *ce,
2762 struct intel_engine_cs *engine,
2763 struct i915_gem_ww_ctx *ww,
2766 return lrc_pre_pin(ce, engine, ww, vaddr);
2769 static int __guc_context_pin(struct intel_context *ce,
2770 struct intel_engine_cs *engine,
2773 if (i915_ggtt_offset(ce->state) !=
2774 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
2775 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
2778 * GuC context gets pinned in guc_request_alloc. See that function for
2779 * explaination of why.
2782 return lrc_pin(ce, engine, vaddr);
2785 static int guc_context_pre_pin(struct intel_context *ce,
2786 struct i915_gem_ww_ctx *ww,
2789 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr);
2792 static int guc_context_pin(struct intel_context *ce, void *vaddr)
2794 int ret = __guc_context_pin(ce, ce->engine, vaddr);
2796 if (likely(!ret && !intel_context_is_barrier(ce)))
2797 intel_engine_pm_get(ce->engine);
2802 static void guc_context_unpin(struct intel_context *ce)
2804 struct intel_guc *guc = ce_to_guc(ce);
2806 unpin_guc_id(guc, ce);
2809 if (likely(!intel_context_is_barrier(ce)))
2810 intel_engine_pm_put_async(ce->engine);
2813 static void guc_context_post_unpin(struct intel_context *ce)
2818 static void __guc_context_sched_enable(struct intel_guc *guc,
2819 struct intel_context *ce)
2822 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2827 trace_intel_context_sched_enable(ce);
2829 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2830 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2833 static void __guc_context_sched_disable(struct intel_guc *guc,
2834 struct intel_context *ce,
2838 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2839 guc_id, /* ce->guc_id.id not stable */
2843 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID);
2845 GEM_BUG_ON(intel_context_is_child(ce));
2846 trace_intel_context_sched_disable(ce);
2848 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2849 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2852 static void guc_blocked_fence_complete(struct intel_context *ce)
2854 lockdep_assert_held(&ce->guc_state.lock);
2856 if (!i915_sw_fence_done(&ce->guc_state.blocked))
2857 i915_sw_fence_complete(&ce->guc_state.blocked);
2860 static void guc_blocked_fence_reinit(struct intel_context *ce)
2862 lockdep_assert_held(&ce->guc_state.lock);
2863 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked));
2866 * This fence is always complete unless a pending schedule disable is
2867 * outstanding. We arm the fence here and complete it when we receive
2868 * the pending schedule disable complete message.
2870 i915_sw_fence_fini(&ce->guc_state.blocked);
2871 i915_sw_fence_reinit(&ce->guc_state.blocked);
2872 i915_sw_fence_await(&ce->guc_state.blocked);
2873 i915_sw_fence_commit(&ce->guc_state.blocked);
2876 static u16 prep_context_pending_disable(struct intel_context *ce)
2878 lockdep_assert_held(&ce->guc_state.lock);
2880 set_context_pending_disable(ce);
2881 clr_context_enabled(ce);
2882 guc_blocked_fence_reinit(ce);
2883 intel_context_get(ce);
2885 return ce->guc_id.id;
2888 static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
2890 struct intel_guc *guc = ce_to_guc(ce);
2891 unsigned long flags;
2892 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2893 intel_wakeref_t wakeref;
2897 GEM_BUG_ON(intel_context_is_child(ce));
2899 spin_lock_irqsave(&ce->guc_state.lock, flags);
2901 incr_context_blocked(ce);
2903 enabled = context_enabled(ce);
2904 if (unlikely(!enabled || submission_disabled(guc))) {
2906 clr_context_enabled(ce);
2907 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2908 return &ce->guc_state.blocked;
2912 * We add +2 here as the schedule disable complete CTB handler calls
2913 * intel_context_sched_disable_unpin (-2 to pin_count).
2915 atomic_add(2, &ce->pin_count);
2917 guc_id = prep_context_pending_disable(ce);
2919 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2921 with_intel_runtime_pm(runtime_pm, wakeref)
2922 __guc_context_sched_disable(guc, ce, guc_id);
2924 return &ce->guc_state.blocked;
2927 #define SCHED_STATE_MULTI_BLOCKED_MASK \
2928 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED)
2929 #define SCHED_STATE_NO_UNBLOCK \
2930 (SCHED_STATE_MULTI_BLOCKED_MASK | \
2931 SCHED_STATE_PENDING_DISABLE | \
2934 static bool context_cant_unblock(struct intel_context *ce)
2936 lockdep_assert_held(&ce->guc_state.lock);
2938 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) ||
2939 context_guc_id_invalid(ce) ||
2940 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) ||
2941 !intel_context_is_pinned(ce);
2944 static void guc_context_unblock(struct intel_context *ce)
2946 struct intel_guc *guc = ce_to_guc(ce);
2947 unsigned long flags;
2948 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2949 intel_wakeref_t wakeref;
2952 GEM_BUG_ON(context_enabled(ce));
2953 GEM_BUG_ON(intel_context_is_child(ce));
2955 spin_lock_irqsave(&ce->guc_state.lock, flags);
2957 if (unlikely(submission_disabled(guc) ||
2958 context_cant_unblock(ce))) {
2962 set_context_pending_enable(ce);
2963 set_context_enabled(ce);
2964 intel_context_get(ce);
2967 decr_context_blocked(ce);
2969 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2972 with_intel_runtime_pm(runtime_pm, wakeref)
2973 __guc_context_sched_enable(guc, ce);
2977 static void guc_context_cancel_request(struct intel_context *ce,
2978 struct i915_request *rq)
2980 struct intel_context *block_context =
2981 request_to_scheduling_context(rq);
2983 if (i915_sw_fence_signaled(&rq->submit)) {
2984 struct i915_sw_fence *fence;
2986 intel_context_get(ce);
2987 fence = guc_context_block(block_context);
2988 i915_sw_fence_wait(fence);
2989 if (!i915_request_completed(rq)) {
2990 __i915_request_skip(rq);
2991 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head),
2995 guc_context_unblock(block_context);
2996 intel_context_put(ce);
3000 static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
3002 u32 preemption_timeout)
3004 if (guc->fw.major_ver_found >= 70) {
3005 struct context_policy policy;
3007 __guc_context_policy_start_klv(&policy, guc_id);
3008 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
3009 __guc_context_set_context_policies(guc, &policy, true);
3012 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT,
3017 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
3021 static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
3023 struct intel_guc *guc = ce_to_guc(ce);
3024 struct intel_runtime_pm *runtime_pm =
3025 &ce->engine->gt->i915->runtime_pm;
3026 intel_wakeref_t wakeref;
3027 unsigned long flags;
3029 GEM_BUG_ON(intel_context_is_child(ce));
3031 guc_flush_submissions(guc);
3033 spin_lock_irqsave(&ce->guc_state.lock, flags);
3034 set_context_banned(ce);
3036 if (submission_disabled(guc) ||
3037 (!context_enabled(ce) && !context_pending_disable(ce))) {
3038 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3040 guc_cancel_context_requests(ce);
3041 intel_engine_signal_breadcrumbs(ce->engine);
3042 } else if (!context_pending_disable(ce)) {
3046 * We add +2 here as the schedule disable complete CTB handler
3047 * calls intel_context_sched_disable_unpin (-2 to pin_count).
3049 atomic_add(2, &ce->pin_count);
3051 guc_id = prep_context_pending_disable(ce);
3052 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3055 * In addition to disabling scheduling, set the preemption
3056 * timeout to the minimum value (1 us) so the banned context
3057 * gets kicked off the HW ASAP.
3059 with_intel_runtime_pm(runtime_pm, wakeref) {
3060 __guc_context_set_preemption_timeout(guc, guc_id, 1);
3061 __guc_context_sched_disable(guc, ce, guc_id);
3064 if (!context_guc_id_invalid(ce))
3065 with_intel_runtime_pm(runtime_pm, wakeref)
3066 __guc_context_set_preemption_timeout(guc,
3069 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3073 static void guc_context_sched_disable(struct intel_context *ce)
3075 struct intel_guc *guc = ce_to_guc(ce);
3076 unsigned long flags;
3077 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
3078 intel_wakeref_t wakeref;
3081 GEM_BUG_ON(intel_context_is_child(ce));
3083 spin_lock_irqsave(&ce->guc_state.lock, flags);
3086 * We have to check if the context has been disabled by another thread,
3087 * check if submssion has been disabled to seal a race with reset and
3088 * finally check if any more requests have been committed to the
3089 * context ensursing that a request doesn't slip through the
3090 * 'context_pending_disable' fence.
3092 if (unlikely(!context_enabled(ce) || submission_disabled(guc) ||
3093 context_has_committed_requests(ce))) {
3094 clr_context_enabled(ce);
3095 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3098 guc_id = prep_context_pending_disable(ce);
3100 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3102 with_intel_runtime_pm(runtime_pm, wakeref)
3103 __guc_context_sched_disable(guc, ce, guc_id);
3107 intel_context_sched_disable_unpin(ce);
3110 static inline void guc_lrc_desc_unpin(struct intel_context *ce)
3112 struct intel_guc *guc = ce_to_guc(ce);
3113 struct intel_gt *gt = guc_to_gt(guc);
3114 unsigned long flags;
3117 GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
3118 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id));
3119 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
3120 GEM_BUG_ON(context_enabled(ce));
3122 /* Seal race with Reset */
3123 spin_lock_irqsave(&ce->guc_state.lock, flags);
3124 disabled = submission_disabled(guc);
3125 if (likely(!disabled)) {
3126 __intel_gt_pm_get(gt);
3127 set_context_destroyed(ce);
3128 clr_context_registered(ce);
3130 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3131 if (unlikely(disabled)) {
3132 release_guc_id(guc, ce);
3133 __guc_context_destroy(ce);
3137 deregister_context(ce, ce->guc_id.id);
3140 static void __guc_context_destroy(struct intel_context *ce)
3142 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] ||
3143 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
3144 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
3145 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
3146 GEM_BUG_ON(ce->guc_state.number_committed_requests);
3149 intel_context_fini(ce);
3151 if (intel_engine_is_virtual(ce->engine)) {
3152 struct guc_virtual_engine *ve =
3153 container_of(ce, typeof(*ve), context);
3155 if (ve->base.breadcrumbs)
3156 intel_breadcrumbs_put(ve->base.breadcrumbs);
3160 intel_context_free(ce);
3164 static void guc_flush_destroyed_contexts(struct intel_guc *guc)
3166 struct intel_context *ce;
3167 unsigned long flags;
3169 GEM_BUG_ON(!submission_disabled(guc) &&
3170 guc_submission_initialized(guc));
3172 while (!list_empty(&guc->submission_state.destroyed_contexts)) {
3173 spin_lock_irqsave(&guc->submission_state.lock, flags);
3174 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
3175 struct intel_context,
3178 list_del_init(&ce->destroyed_link);
3179 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3184 release_guc_id(guc, ce);
3185 __guc_context_destroy(ce);
3189 static void deregister_destroyed_contexts(struct intel_guc *guc)
3191 struct intel_context *ce;
3192 unsigned long flags;
3194 while (!list_empty(&guc->submission_state.destroyed_contexts)) {
3195 spin_lock_irqsave(&guc->submission_state.lock, flags);
3196 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
3197 struct intel_context,
3200 list_del_init(&ce->destroyed_link);
3201 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3206 guc_lrc_desc_unpin(ce);
3210 static void destroyed_worker_func(struct work_struct *w)
3212 struct intel_guc *guc = container_of(w, struct intel_guc,
3213 submission_state.destroyed_worker);
3214 struct intel_gt *gt = guc_to_gt(guc);
3217 with_intel_gt_pm(gt, tmp)
3218 deregister_destroyed_contexts(guc);
3221 static void guc_context_destroy(struct kref *kref)
3223 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3224 struct intel_guc *guc = ce_to_guc(ce);
3225 unsigned long flags;
3229 * If the guc_id is invalid this context has been stolen and we can free
3230 * it immediately. Also can be freed immediately if the context is not
3231 * registered with the GuC or the GuC is in the middle of a reset.
3233 spin_lock_irqsave(&guc->submission_state.lock, flags);
3234 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) ||
3235 !ctx_id_mapped(guc, ce->guc_id.id);
3236 if (likely(!destroy)) {
3237 if (!list_empty(&ce->guc_id.link))
3238 list_del_init(&ce->guc_id.link);
3239 list_add_tail(&ce->destroyed_link,
3240 &guc->submission_state.destroyed_contexts);
3242 __release_guc_id(guc, ce);
3244 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3245 if (unlikely(destroy)) {
3246 __guc_context_destroy(ce);
3251 * We use a worker to issue the H2G to deregister the context as we can
3252 * take the GT PM for the first time which isn't allowed from an atomic
3255 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker);
3258 static int guc_context_alloc(struct intel_context *ce)
3260 return lrc_alloc(ce, ce->engine);
3263 static void __guc_context_set_prio(struct intel_guc *guc,
3264 struct intel_context *ce)
3266 if (guc->fw.major_ver_found >= 70) {
3267 struct context_policy policy;
3269 __guc_context_policy_start_klv(&policy, ce->guc_id.id);
3270 __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
3271 __guc_context_set_context_policies(guc, &policy, true);
3274 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY,
3279 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
3283 static void guc_context_set_prio(struct intel_guc *guc,
3284 struct intel_context *ce,
3287 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH ||
3288 prio > GUC_CLIENT_PRIORITY_NORMAL);
3289 lockdep_assert_held(&ce->guc_state.lock);
3291 if (ce->guc_state.prio == prio || submission_disabled(guc) ||
3292 !context_registered(ce)) {
3293 ce->guc_state.prio = prio;
3297 ce->guc_state.prio = prio;
3298 __guc_context_set_prio(guc, ce);
3300 trace_intel_context_set_prio(ce);
3303 static inline u8 map_i915_prio_to_guc_prio(int prio)
3305 if (prio == I915_PRIORITY_NORMAL)
3306 return GUC_CLIENT_PRIORITY_KMD_NORMAL;
3307 else if (prio < I915_PRIORITY_NORMAL)
3308 return GUC_CLIENT_PRIORITY_NORMAL;
3309 else if (prio < I915_PRIORITY_DISPLAY)
3310 return GUC_CLIENT_PRIORITY_HIGH;
3312 return GUC_CLIENT_PRIORITY_KMD_HIGH;
3315 static inline void add_context_inflight_prio(struct intel_context *ce,
3318 lockdep_assert_held(&ce->guc_state.lock);
3319 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
3321 ++ce->guc_state.prio_count[guc_prio];
3323 /* Overflow protection */
3324 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
3327 static inline void sub_context_inflight_prio(struct intel_context *ce,
3330 lockdep_assert_held(&ce->guc_state.lock);
3331 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
3333 /* Underflow protection */
3334 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
3336 --ce->guc_state.prio_count[guc_prio];
3339 static inline void update_context_prio(struct intel_context *ce)
3341 struct intel_guc *guc = &ce->engine->gt->uc.guc;
3344 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0);
3345 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL);
3347 lockdep_assert_held(&ce->guc_state.lock);
3349 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) {
3350 if (ce->guc_state.prio_count[i]) {
3351 guc_context_set_prio(guc, ce, i);
3357 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio)
3359 /* Lower value is higher priority */
3360 return new_guc_prio < old_guc_prio;
3363 static void add_to_context(struct i915_request *rq)
3365 struct intel_context *ce = request_to_scheduling_context(rq);
3366 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq));
3368 GEM_BUG_ON(intel_context_is_child(ce));
3369 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI);
3371 spin_lock(&ce->guc_state.lock);
3372 list_move_tail(&rq->sched.link, &ce->guc_state.requests);
3374 if (rq->guc_prio == GUC_PRIO_INIT) {
3375 rq->guc_prio = new_guc_prio;
3376 add_context_inflight_prio(ce, rq->guc_prio);
3377 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) {
3378 sub_context_inflight_prio(ce, rq->guc_prio);
3379 rq->guc_prio = new_guc_prio;
3380 add_context_inflight_prio(ce, rq->guc_prio);
3382 update_context_prio(ce);
3384 spin_unlock(&ce->guc_state.lock);
3387 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce)
3389 lockdep_assert_held(&ce->guc_state.lock);
3391 if (rq->guc_prio != GUC_PRIO_INIT &&
3392 rq->guc_prio != GUC_PRIO_FINI) {
3393 sub_context_inflight_prio(ce, rq->guc_prio);
3394 update_context_prio(ce);
3396 rq->guc_prio = GUC_PRIO_FINI;
3399 static void remove_from_context(struct i915_request *rq)
3401 struct intel_context *ce = request_to_scheduling_context(rq);
3403 GEM_BUG_ON(intel_context_is_child(ce));
3405 spin_lock_irq(&ce->guc_state.lock);
3407 list_del_init(&rq->sched.link);
3408 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3410 /* Prevent further __await_execution() registering a cb, then flush */
3411 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
3413 guc_prio_fini(rq, ce);
3415 decr_context_committed_requests(ce);
3417 spin_unlock_irq(&ce->guc_state.lock);
3419 atomic_dec(&ce->guc_id.ref);
3420 i915_request_notify_execute_cb_imm(rq);
3423 static const struct intel_context_ops guc_context_ops = {
3424 .alloc = guc_context_alloc,
3426 .pre_pin = guc_context_pre_pin,
3427 .pin = guc_context_pin,
3428 .unpin = guc_context_unpin,
3429 .post_unpin = guc_context_post_unpin,
3431 .ban = guc_context_ban,
3433 .cancel_request = guc_context_cancel_request,
3435 .enter = intel_context_enter_engine,
3436 .exit = intel_context_exit_engine,
3438 .sched_disable = guc_context_sched_disable,
3441 .destroy = guc_context_destroy,
3443 .create_virtual = guc_create_virtual,
3444 .create_parallel = guc_create_parallel,
3447 static void submit_work_cb(struct irq_work *wrk)
3449 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work);
3451 might_lock(&rq->engine->sched_engine->lock);
3452 i915_sw_fence_complete(&rq->submit);
3455 static void __guc_signal_context_fence(struct intel_context *ce)
3457 struct i915_request *rq, *rn;
3459 lockdep_assert_held(&ce->guc_state.lock);
3461 if (!list_empty(&ce->guc_state.fences))
3462 trace_intel_context_fence_release(ce);
3465 * Use an IRQ to ensure locking order of sched_engine->lock ->
3466 * ce->guc_state.lock is preserved.
3468 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences,
3470 list_del(&rq->guc_fence_link);
3471 irq_work_queue(&rq->submit_work);
3474 INIT_LIST_HEAD(&ce->guc_state.fences);
3477 static void guc_signal_context_fence(struct intel_context *ce)
3479 unsigned long flags;
3481 GEM_BUG_ON(intel_context_is_child(ce));
3483 spin_lock_irqsave(&ce->guc_state.lock, flags);
3484 clr_context_wait_for_deregister_to_register(ce);
3485 __guc_signal_context_fence(ce);
3486 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3489 static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
3491 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
3492 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) &&
3493 !submission_disabled(ce_to_guc(ce));
3496 static void guc_context_init(struct intel_context *ce)
3498 const struct i915_gem_context *ctx;
3499 int prio = I915_CONTEXT_DEFAULT_PRIORITY;
3502 ctx = rcu_dereference(ce->gem_context);
3504 prio = ctx->sched.priority;
3507 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
3508 set_bit(CONTEXT_GUC_INIT, &ce->flags);
3511 static int guc_request_alloc(struct i915_request *rq)
3513 struct intel_context *ce = request_to_scheduling_context(rq);
3514 struct intel_guc *guc = ce_to_guc(ce);
3515 unsigned long flags;
3518 GEM_BUG_ON(!intel_context_is_pinned(rq->context));
3521 * Flush enough space to reduce the likelihood of waiting after
3522 * we start building the request - in which case we will just
3523 * have to repeat work.
3525 rq->reserved_space += GUC_REQUEST_SIZE;
3528 * Note that after this point, we have committed to using
3529 * this request as it is being used to both track the
3530 * state of engine initialisation and liveness of the
3531 * golden renderstate above. Think twice before you try
3532 * to cancel/unwind this request now.
3535 /* Unconditionally invalidate GPU caches and TLBs. */
3536 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
3540 rq->reserved_space -= GUC_REQUEST_SIZE;
3542 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags)))
3543 guc_context_init(ce);
3546 * Call pin_guc_id here rather than in the pinning step as with
3547 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the
3548 * guc_id and creating horrible race conditions. This is especially bad
3549 * when guc_id are being stolen due to over subscription. By the time
3550 * this function is reached, it is guaranteed that the guc_id will be
3551 * persistent until the generated request is retired. Thus, sealing these
3552 * race conditions. It is still safe to fail here if guc_id are
3553 * exhausted and return -EAGAIN to the user indicating that they can try
3554 * again in the future.
3556 * There is no need for a lock here as the timeline mutex ensures at
3557 * most one context can be executing this code path at once. The
3558 * guc_id_ref is incremented once for every request in flight and
3559 * decremented on each retire. When it is zero, a lock around the
3560 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
3562 if (atomic_add_unless(&ce->guc_id.ref, 1, 0))
3565 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */
3566 if (unlikely(ret < 0))
3568 if (context_needs_register(ce, !!ret)) {
3569 ret = try_context_registration(ce, true);
3570 if (unlikely(ret)) { /* unwind */
3571 if (ret == -EPIPE) {
3572 disable_submission(guc);
3573 goto out; /* GPU will be reset */
3575 atomic_dec(&ce->guc_id.ref);
3576 unpin_guc_id(guc, ce);
3581 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
3585 * We block all requests on this context if a G2H is pending for a
3586 * schedule disable or context deregistration as the GuC will fail a
3587 * schedule enable or context registration if either G2H is pending
3588 * respectfully. Once a G2H returns, the fence is released that is
3589 * blocking these requests (see guc_signal_context_fence).
3591 spin_lock_irqsave(&ce->guc_state.lock, flags);
3592 if (context_wait_for_deregister_to_register(ce) ||
3593 context_pending_disable(ce)) {
3594 init_irq_work(&rq->submit_work, submit_work_cb);
3595 i915_sw_fence_await(&rq->submit);
3597 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
3599 incr_context_committed_requests(ce);
3600 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3605 static int guc_virtual_context_pre_pin(struct intel_context *ce,
3606 struct i915_gem_ww_ctx *ww,
3609 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3611 return __guc_context_pre_pin(ce, engine, ww, vaddr);
3614 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr)
3616 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3617 int ret = __guc_context_pin(ce, engine, vaddr);
3618 intel_engine_mask_t tmp, mask = ce->engine->mask;
3621 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3622 intel_engine_pm_get(engine);
3627 static void guc_virtual_context_unpin(struct intel_context *ce)
3629 intel_engine_mask_t tmp, mask = ce->engine->mask;
3630 struct intel_engine_cs *engine;
3631 struct intel_guc *guc = ce_to_guc(ce);
3633 GEM_BUG_ON(context_enabled(ce));
3634 GEM_BUG_ON(intel_context_is_barrier(ce));
3636 unpin_guc_id(guc, ce);
3639 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3640 intel_engine_pm_put_async(engine);
3643 static void guc_virtual_context_enter(struct intel_context *ce)
3645 intel_engine_mask_t tmp, mask = ce->engine->mask;
3646 struct intel_engine_cs *engine;
3648 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3649 intel_engine_pm_get(engine);
3651 intel_timeline_enter(ce->timeline);
3654 static void guc_virtual_context_exit(struct intel_context *ce)
3656 intel_engine_mask_t tmp, mask = ce->engine->mask;
3657 struct intel_engine_cs *engine;
3659 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3660 intel_engine_pm_put(engine);
3662 intel_timeline_exit(ce->timeline);
3665 static int guc_virtual_context_alloc(struct intel_context *ce)
3667 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3669 return lrc_alloc(ce, engine);
3672 static const struct intel_context_ops virtual_guc_context_ops = {
3673 .alloc = guc_virtual_context_alloc,
3675 .pre_pin = guc_virtual_context_pre_pin,
3676 .pin = guc_virtual_context_pin,
3677 .unpin = guc_virtual_context_unpin,
3678 .post_unpin = guc_context_post_unpin,
3680 .ban = guc_context_ban,
3682 .cancel_request = guc_context_cancel_request,
3684 .enter = guc_virtual_context_enter,
3685 .exit = guc_virtual_context_exit,
3687 .sched_disable = guc_context_sched_disable,
3689 .destroy = guc_context_destroy,
3691 .get_sibling = guc_virtual_get_sibling,
3694 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr)
3696 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3697 struct intel_guc *guc = ce_to_guc(ce);
3700 GEM_BUG_ON(!intel_context_is_parent(ce));
3701 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3703 ret = pin_guc_id(guc, ce);
3704 if (unlikely(ret < 0))
3707 return __guc_context_pin(ce, engine, vaddr);
3710 static int guc_child_context_pin(struct intel_context *ce, void *vaddr)
3712 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3714 GEM_BUG_ON(!intel_context_is_child(ce));
3715 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3717 __intel_context_pin(ce->parallel.parent);
3718 return __guc_context_pin(ce, engine, vaddr);
3721 static void guc_parent_context_unpin(struct intel_context *ce)
3723 struct intel_guc *guc = ce_to_guc(ce);
3725 GEM_BUG_ON(context_enabled(ce));
3726 GEM_BUG_ON(intel_context_is_barrier(ce));
3727 GEM_BUG_ON(!intel_context_is_parent(ce));
3728 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3730 unpin_guc_id(guc, ce);
3734 static void guc_child_context_unpin(struct intel_context *ce)
3736 GEM_BUG_ON(context_enabled(ce));
3737 GEM_BUG_ON(intel_context_is_barrier(ce));
3738 GEM_BUG_ON(!intel_context_is_child(ce));
3739 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3744 static void guc_child_context_post_unpin(struct intel_context *ce)
3746 GEM_BUG_ON(!intel_context_is_child(ce));
3747 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
3748 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3751 intel_context_unpin(ce->parallel.parent);
3754 static void guc_child_context_destroy(struct kref *kref)
3756 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3758 __guc_context_destroy(ce);
3761 static const struct intel_context_ops virtual_parent_context_ops = {
3762 .alloc = guc_virtual_context_alloc,
3764 .pre_pin = guc_context_pre_pin,
3765 .pin = guc_parent_context_pin,
3766 .unpin = guc_parent_context_unpin,
3767 .post_unpin = guc_context_post_unpin,
3769 .ban = guc_context_ban,
3771 .cancel_request = guc_context_cancel_request,
3773 .enter = guc_virtual_context_enter,
3774 .exit = guc_virtual_context_exit,
3776 .sched_disable = guc_context_sched_disable,
3778 .destroy = guc_context_destroy,
3780 .get_sibling = guc_virtual_get_sibling,
3783 static const struct intel_context_ops virtual_child_context_ops = {
3784 .alloc = guc_virtual_context_alloc,
3786 .pre_pin = guc_context_pre_pin,
3787 .pin = guc_child_context_pin,
3788 .unpin = guc_child_context_unpin,
3789 .post_unpin = guc_child_context_post_unpin,
3791 .cancel_request = guc_context_cancel_request,
3793 .enter = guc_virtual_context_enter,
3794 .exit = guc_virtual_context_exit,
3796 .destroy = guc_child_context_destroy,
3798 .get_sibling = guc_virtual_get_sibling,
3802 * The below override of the breadcrumbs is enabled when the user configures a
3803 * context for parallel submission (multi-lrc, parent-child).
3805 * The overridden breadcrumbs implements an algorithm which allows the GuC to
3806 * safely preempt all the hw contexts configured for parallel submission
3807 * between each BB. The contract between the i915 and GuC is if the parent
3808 * context can be preempted, all the children can be preempted, and the GuC will
3809 * always try to preempt the parent before the children. A handshake between the
3810 * parent / children breadcrumbs ensures the i915 holds up its end of the deal
3811 * creating a window to preempt between each set of BBs.
3813 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
3814 u64 offset, u32 len,
3815 const unsigned int flags);
3816 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
3817 u64 offset, u32 len,
3818 const unsigned int flags);
3820 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
3823 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
3826 static struct intel_context *
3827 guc_create_parallel(struct intel_engine_cs **engines,
3828 unsigned int num_siblings,
3831 struct intel_engine_cs **siblings = NULL;
3832 struct intel_context *parent = NULL, *ce, *err;
3835 siblings = kmalloc_array(num_siblings,
3839 return ERR_PTR(-ENOMEM);
3841 for (i = 0; i < width; ++i) {
3842 for (j = 0; j < num_siblings; ++j)
3843 siblings[j] = engines[i * num_siblings + j];
3845 ce = intel_engine_create_virtual(siblings, num_siblings,
3854 parent->ops = &virtual_parent_context_ops;
3856 ce->ops = &virtual_child_context_ops;
3857 intel_context_bind_parent_child(parent, ce);
3861 parent->parallel.fence_context = dma_fence_context_alloc(1);
3863 parent->engine->emit_bb_start =
3864 emit_bb_start_parent_no_preempt_mid_batch;
3865 parent->engine->emit_fini_breadcrumb =
3866 emit_fini_breadcrumb_parent_no_preempt_mid_batch;
3867 parent->engine->emit_fini_breadcrumb_dw =
3868 12 + 4 * parent->parallel.number_children;
3869 for_each_child(parent, ce) {
3870 ce->engine->emit_bb_start =
3871 emit_bb_start_child_no_preempt_mid_batch;
3872 ce->engine->emit_fini_breadcrumb =
3873 emit_fini_breadcrumb_child_no_preempt_mid_batch;
3874 ce->engine->emit_fini_breadcrumb_dw = 16;
3882 intel_context_put(parent);
3888 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b)
3890 struct intel_engine_cs *sibling;
3891 intel_engine_mask_t tmp, mask = b->engine_mask;
3892 bool result = false;
3894 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
3895 result |= intel_engine_irq_enable(sibling);
3901 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b)
3903 struct intel_engine_cs *sibling;
3904 intel_engine_mask_t tmp, mask = b->engine_mask;
3906 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
3907 intel_engine_irq_disable(sibling);
3910 static void guc_init_breadcrumbs(struct intel_engine_cs *engine)
3915 * In GuC submission mode we do not know which physical engine a request
3916 * will be scheduled on, this creates a problem because the breadcrumb
3917 * interrupt is per physical engine. To work around this we attach
3918 * requests and direct all breadcrumb interrupts to the first instance
3919 * of an engine per class. In addition all breadcrumb interrupts are
3920 * enabled / disabled across an engine class in unison.
3922 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) {
3923 struct intel_engine_cs *sibling =
3924 engine->gt->engine_class[engine->class][i];
3927 if (engine->breadcrumbs != sibling->breadcrumbs) {
3928 intel_breadcrumbs_put(engine->breadcrumbs);
3929 engine->breadcrumbs =
3930 intel_breadcrumbs_get(sibling->breadcrumbs);
3936 if (engine->breadcrumbs) {
3937 engine->breadcrumbs->engine_mask |= engine->mask;
3938 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs;
3939 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs;
3943 static void guc_bump_inflight_request_prio(struct i915_request *rq,
3946 struct intel_context *ce = request_to_scheduling_context(rq);
3947 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
3949 /* Short circuit function */
3950 if (prio < I915_PRIORITY_NORMAL ||
3951 rq->guc_prio == GUC_PRIO_FINI ||
3952 (rq->guc_prio != GUC_PRIO_INIT &&
3953 !new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
3956 spin_lock(&ce->guc_state.lock);
3957 if (rq->guc_prio != GUC_PRIO_FINI) {
3958 if (rq->guc_prio != GUC_PRIO_INIT)
3959 sub_context_inflight_prio(ce, rq->guc_prio);
3960 rq->guc_prio = new_guc_prio;
3961 add_context_inflight_prio(ce, rq->guc_prio);
3962 update_context_prio(ce);
3964 spin_unlock(&ce->guc_state.lock);
3967 static void guc_retire_inflight_request_prio(struct i915_request *rq)
3969 struct intel_context *ce = request_to_scheduling_context(rq);
3971 spin_lock(&ce->guc_state.lock);
3972 guc_prio_fini(rq, ce);
3973 spin_unlock(&ce->guc_state.lock);
3976 static void sanitize_hwsp(struct intel_engine_cs *engine)
3978 struct intel_timeline *tl;
3980 list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
3981 intel_timeline_reset_seqno(tl);
3984 static void guc_sanitize(struct intel_engine_cs *engine)
3987 * Poison residual state on resume, in case the suspend didn't!
3989 * We have to assume that across suspend/resume (or other loss
3990 * of control) that the contents of our pinned buffers has been
3991 * lost, replaced by garbage. Since this doesn't always happen,
3992 * let's poison such state so that we more quickly spot when
3993 * we falsely assume it has been preserved.
3995 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3996 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
3999 * The kernel_context HWSP is stored in the status_page. As above,
4000 * that may be lost on resume/initialisation, and so we need to
4001 * reset the value in the HWSP.
4003 sanitize_hwsp(engine);
4005 /* And scrub the dirty cachelines for the HWSP */
4006 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
4008 intel_engine_reset_pinned_contexts(engine);
4011 static void setup_hwsp(struct intel_engine_cs *engine)
4013 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
4015 ENGINE_WRITE_FW(engine,
4017 i915_ggtt_offset(engine->status_page.vma));
4020 static void start_engine(struct intel_engine_cs *engine)
4022 ENGINE_WRITE_FW(engine,
4024 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
4026 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
4027 ENGINE_POSTING_READ(engine, RING_MI_MODE);
4030 static int guc_resume(struct intel_engine_cs *engine)
4032 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
4034 intel_mocs_init_engine(engine);
4036 intel_breadcrumbs_reset(engine->breadcrumbs);
4039 start_engine(engine);
4041 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
4042 xehp_enable_ccs_engines(engine);
4047 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine)
4049 return !sched_engine->tasklet.callback;
4052 static void guc_set_default_submission(struct intel_engine_cs *engine)
4054 engine->submit_request = guc_submit_request;
4057 static inline void guc_kernel_context_pin(struct intel_guc *guc,
4058 struct intel_context *ce)
4061 * Note: we purposefully do not check the returns below because
4062 * the registration can only fail if a reset is just starting.
4063 * This is called at the end of reset so presumably another reset
4064 * isn't happening and even it did this code would be run again.
4067 if (context_guc_id_invalid(ce))
4068 pin_guc_id(guc, ce);
4070 try_context_registration(ce, true);
4073 static inline void guc_init_lrc_mapping(struct intel_guc *guc)
4075 struct intel_gt *gt = guc_to_gt(guc);
4076 struct intel_engine_cs *engine;
4077 enum intel_engine_id id;
4079 /* make sure all descriptors are clean... */
4080 xa_destroy(&guc->context_lookup);
4083 * Some contexts might have been pinned before we enabled GuC
4084 * submission, so we need to add them to the GuC bookeeping.
4085 * Also, after a reset the of the GuC we want to make sure that the
4086 * information shared with GuC is properly reset. The kernel LRCs are
4087 * not attached to the gem_context, so they need to be added separately.
4089 for_each_engine(engine, gt, id) {
4090 struct intel_context *ce;
4092 list_for_each_entry(ce, &engine->pinned_contexts_list,
4093 pinned_contexts_link)
4094 guc_kernel_context_pin(guc, ce);
4098 static void guc_release(struct intel_engine_cs *engine)
4100 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
4102 intel_engine_cleanup_common(engine);
4103 lrc_fini_wa_ctx(engine);
4106 static void virtual_guc_bump_serial(struct intel_engine_cs *engine)
4108 struct intel_engine_cs *e;
4109 intel_engine_mask_t tmp, mask = engine->mask;
4111 for_each_engine_masked(e, engine->gt, mask, tmp)
4115 static void guc_default_vfuncs(struct intel_engine_cs *engine)
4117 /* Default vfuncs which can be overridden by each engine. */
4119 engine->resume = guc_resume;
4121 engine->cops = &guc_context_ops;
4122 engine->request_alloc = guc_request_alloc;
4123 engine->add_active_request = add_to_context;
4124 engine->remove_active_request = remove_from_context;
4126 engine->sched_engine->schedule = i915_schedule;
4128 engine->reset.prepare = guc_engine_reset_prepare;
4129 engine->reset.rewind = guc_rewind_nop;
4130 engine->reset.cancel = guc_reset_nop;
4131 engine->reset.finish = guc_reset_nop;
4133 engine->emit_flush = gen8_emit_flush_xcs;
4134 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
4135 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
4136 if (GRAPHICS_VER(engine->i915) >= 12) {
4137 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
4138 engine->emit_flush = gen12_emit_flush_xcs;
4140 engine->set_default_submission = guc_set_default_submission;
4141 engine->busyness = guc_engine_busyness;
4143 engine->flags |= I915_ENGINE_SUPPORTS_STATS;
4144 engine->flags |= I915_ENGINE_HAS_PREEMPTION;
4145 engine->flags |= I915_ENGINE_HAS_TIMESLICES;
4147 /* Wa_14014475959:dg2 */
4148 if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS)
4149 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
4152 * TODO: GuC supports timeslicing and semaphores as well, but they're
4153 * handled by the firmware so some minor tweaks are required before
4156 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
4159 engine->emit_bb_start = gen8_emit_bb_start;
4160 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
4161 engine->emit_bb_start = gen125_emit_bb_start;
4164 static void rcs_submission_override(struct intel_engine_cs *engine)
4166 switch (GRAPHICS_VER(engine->i915)) {
4168 engine->emit_flush = gen12_emit_flush_rcs;
4169 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
4172 engine->emit_flush = gen11_emit_flush_rcs;
4173 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
4176 engine->emit_flush = gen8_emit_flush_rcs;
4177 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
4182 static inline void guc_default_irqs(struct intel_engine_cs *engine)
4184 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
4185 intel_engine_set_irq_handler(engine, cs_irq_handler);
4188 static void guc_sched_engine_destroy(struct kref *kref)
4190 struct i915_sched_engine *sched_engine =
4191 container_of(kref, typeof(*sched_engine), ref);
4192 struct intel_guc *guc = sched_engine->private_data;
4194 guc->sched_engine = NULL;
4195 tasklet_kill(&sched_engine->tasklet); /* flush the callback */
4196 kfree(sched_engine);
4199 int intel_guc_submission_setup(struct intel_engine_cs *engine)
4201 struct drm_i915_private *i915 = engine->i915;
4202 struct intel_guc *guc = &engine->gt->uc.guc;
4205 * The setup relies on several assumptions (e.g. irqs always enabled)
4206 * that are only valid on gen11+
4208 GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
4210 if (!guc->sched_engine) {
4211 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL);
4212 if (!guc->sched_engine)
4215 guc->sched_engine->schedule = i915_schedule;
4216 guc->sched_engine->disabled = guc_sched_engine_disabled;
4217 guc->sched_engine->private_data = guc;
4218 guc->sched_engine->destroy = guc_sched_engine_destroy;
4219 guc->sched_engine->bump_inflight_request_prio =
4220 guc_bump_inflight_request_prio;
4221 guc->sched_engine->retire_inflight_request_prio =
4222 guc_retire_inflight_request_prio;
4223 tasklet_setup(&guc->sched_engine->tasklet,
4224 guc_submission_tasklet);
4226 i915_sched_engine_put(engine->sched_engine);
4227 engine->sched_engine = i915_sched_engine_get(guc->sched_engine);
4229 guc_default_vfuncs(engine);
4230 guc_default_irqs(engine);
4231 guc_init_breadcrumbs(engine);
4233 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)
4234 rcs_submission_override(engine);
4236 lrc_init_wa_ctx(engine);
4238 /* Finally, take ownership and responsibility for cleanup! */
4239 engine->sanitize = guc_sanitize;
4240 engine->release = guc_release;
4245 void intel_guc_submission_enable(struct intel_guc *guc)
4247 guc_init_lrc_mapping(guc);
4248 guc_init_engine_stats(guc);
4251 void intel_guc_submission_disable(struct intel_guc *guc)
4253 /* Note: By the time we're here, GuC may have already been reset */
4256 static bool __guc_submission_supported(struct intel_guc *guc)
4258 /* GuC submission is unavailable for pre-Gen11 */
4259 return intel_guc_is_supported(guc) &&
4260 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11;
4263 static bool __guc_submission_selected(struct intel_guc *guc)
4265 struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
4267 if (!intel_guc_submission_is_supported(guc))
4270 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION;
4273 void intel_guc_submission_init_early(struct intel_guc *guc)
4275 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
4277 spin_lock_init(&guc->submission_state.lock);
4278 INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
4279 ida_init(&guc->submission_state.guc_ids);
4280 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
4281 INIT_WORK(&guc->submission_state.destroyed_worker,
4282 destroyed_worker_func);
4283 INIT_WORK(&guc->submission_state.reset_fail_worker,
4284 reset_fail_worker_func);
4286 spin_lock_init(&guc->timestamp.lock);
4287 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
4289 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID;
4290 guc->submission_supported = __guc_submission_supported(guc);
4291 guc->submission_selected = __guc_submission_selected(guc);
4294 static inline struct intel_context *
4295 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id)
4297 struct intel_context *ce;
4299 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) {
4300 drm_err(&guc_to_gt(guc)->i915->drm,
4301 "Invalid ctx_id %u\n", ctx_id);
4305 ce = __get_context(guc, ctx_id);
4306 if (unlikely(!ce)) {
4307 drm_err(&guc_to_gt(guc)->i915->drm,
4308 "Context is NULL, ctx_id %u\n", ctx_id);
4312 if (unlikely(intel_context_is_child(ce))) {
4313 drm_err(&guc_to_gt(guc)->i915->drm,
4314 "Context is child, ctx_id %u\n", ctx_id);
4321 int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
4325 struct intel_context *ce;
4328 if (unlikely(len < 1)) {
4329 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
4334 ce = g2h_context_lookup(guc, ctx_id);
4338 trace_intel_context_deregister_done(ce);
4340 #ifdef CONFIG_DRM_I915_SELFTEST
4341 if (unlikely(ce->drop_deregister)) {
4342 ce->drop_deregister = false;
4347 if (context_wait_for_deregister_to_register(ce)) {
4348 struct intel_runtime_pm *runtime_pm =
4349 &ce->engine->gt->i915->runtime_pm;
4350 intel_wakeref_t wakeref;
4353 * Previous owner of this guc_id has been deregistered, now safe
4354 * register this context.
4356 with_intel_runtime_pm(runtime_pm, wakeref)
4357 register_context(ce, true);
4358 guc_signal_context_fence(ce);
4359 intel_context_put(ce);
4360 } else if (context_destroyed(ce)) {
4361 /* Context has been destroyed */
4362 intel_gt_pm_put_async(guc_to_gt(guc));
4363 release_guc_id(guc, ce);
4364 __guc_context_destroy(ce);
4367 decr_outstanding_submission_g2h(guc);
4372 int intel_guc_sched_done_process_msg(struct intel_guc *guc,
4376 struct intel_context *ce;
4377 unsigned long flags;
4380 if (unlikely(len < 2)) {
4381 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
4386 ce = g2h_context_lookup(guc, ctx_id);
4390 if (unlikely(context_destroyed(ce) ||
4391 (!context_pending_enable(ce) &&
4392 !context_pending_disable(ce)))) {
4393 drm_err(&guc_to_gt(guc)->i915->drm,
4394 "Bad context sched_state 0x%x, ctx_id %u\n",
4395 ce->guc_state.sched_state, ctx_id);
4399 trace_intel_context_sched_done(ce);
4401 if (context_pending_enable(ce)) {
4402 #ifdef CONFIG_DRM_I915_SELFTEST
4403 if (unlikely(ce->drop_schedule_enable)) {
4404 ce->drop_schedule_enable = false;
4409 spin_lock_irqsave(&ce->guc_state.lock, flags);
4410 clr_context_pending_enable(ce);
4411 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
4412 } else if (context_pending_disable(ce)) {
4415 #ifdef CONFIG_DRM_I915_SELFTEST
4416 if (unlikely(ce->drop_schedule_disable)) {
4417 ce->drop_schedule_disable = false;
4423 * Unpin must be done before __guc_signal_context_fence,
4424 * otherwise a race exists between the requests getting
4425 * submitted + retired before this unpin completes resulting in
4426 * the pin_count going to zero and the context still being
4429 intel_context_sched_disable_unpin(ce);
4431 spin_lock_irqsave(&ce->guc_state.lock, flags);
4432 banned = context_banned(ce);
4433 clr_context_banned(ce);
4434 clr_context_pending_disable(ce);
4435 __guc_signal_context_fence(ce);
4436 guc_blocked_fence_complete(ce);
4437 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
4440 guc_cancel_context_requests(ce);
4441 intel_engine_signal_breadcrumbs(ce->engine);
4445 decr_outstanding_submission_g2h(guc);
4446 intel_context_put(ce);
4451 static void capture_error_state(struct intel_guc *guc,
4452 struct intel_context *ce)
4454 struct intel_gt *gt = guc_to_gt(guc);
4455 struct drm_i915_private *i915 = gt->i915;
4456 struct intel_engine_cs *engine = __context_to_physical_engine(ce);
4457 intel_wakeref_t wakeref;
4459 intel_engine_set_hung_context(engine, ce);
4460 with_intel_runtime_pm(&i915->runtime_pm, wakeref)
4461 i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE);
4462 atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]);
4465 static void guc_context_replay(struct intel_context *ce)
4467 struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
4469 __guc_reset_context(ce, ce->engine->mask);
4470 tasklet_hi_schedule(&sched_engine->tasklet);
4473 static void guc_handle_context_reset(struct intel_guc *guc,
4474 struct intel_context *ce)
4476 trace_intel_context_reset(ce);
4478 if (likely(!intel_context_is_banned(ce))) {
4479 capture_error_state(guc, ce);
4480 guc_context_replay(ce);
4482 drm_info(&guc_to_gt(guc)->i915->drm,
4483 "Ignoring context reset notification of banned context 0x%04X on %s",
4484 ce->guc_id.id, ce->engine->name);
4488 int intel_guc_context_reset_process_msg(struct intel_guc *guc,
4489 const u32 *msg, u32 len)
4491 struct intel_context *ce;
4492 unsigned long flags;
4495 if (unlikely(len != 1)) {
4496 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
4503 * The context lookup uses the xarray but lookups only require an RCU lock
4504 * not the full spinlock. So take the lock explicitly and keep it until the
4505 * context has been reference count locked to ensure it can't be destroyed
4506 * asynchronously until the reset is done.
4508 xa_lock_irqsave(&guc->context_lookup, flags);
4509 ce = g2h_context_lookup(guc, ctx_id);
4511 intel_context_get(ce);
4512 xa_unlock_irqrestore(&guc->context_lookup, flags);
4517 guc_handle_context_reset(guc, ce);
4518 intel_context_put(ce);
4523 int intel_guc_error_capture_process_msg(struct intel_guc *guc,
4524 const u32 *msg, u32 len)
4528 if (unlikely(len != 1)) {
4529 drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
4533 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
4534 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
4535 drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space");
4537 intel_guc_capture_process(guc);
4542 struct intel_engine_cs *
4543 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
4545 struct intel_gt *gt = guc_to_gt(guc);
4546 u8 engine_class = guc_class_to_engine_class(guc_class);
4548 /* Class index is checked in class converter */
4549 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE);
4551 return gt->engine_class[engine_class][instance];
4554 static void reset_fail_worker_func(struct work_struct *w)
4556 struct intel_guc *guc = container_of(w, struct intel_guc,
4557 submission_state.reset_fail_worker);
4558 struct intel_gt *gt = guc_to_gt(guc);
4559 intel_engine_mask_t reset_fail_mask;
4560 unsigned long flags;
4562 spin_lock_irqsave(&guc->submission_state.lock, flags);
4563 reset_fail_mask = guc->submission_state.reset_fail_mask;
4564 guc->submission_state.reset_fail_mask = 0;
4565 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
4567 if (likely(reset_fail_mask))
4568 intel_gt_handle_error(gt, reset_fail_mask,
4570 "GuC failed to reset engine mask=0x%x\n",
4574 int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
4575 const u32 *msg, u32 len)
4577 struct intel_engine_cs *engine;
4578 struct intel_gt *gt = guc_to_gt(guc);
4579 u8 guc_class, instance;
4581 unsigned long flags;
4583 if (unlikely(len != 3)) {
4584 drm_err(>->i915->drm, "Invalid length %u", len);
4592 engine = intel_guc_lookup_engine(guc, guc_class, instance);
4593 if (unlikely(!engine)) {
4594 drm_err(>->i915->drm,
4595 "Invalid engine %d:%d", guc_class, instance);
4600 * This is an unexpected failure of a hardware feature. So, log a real
4601 * error message not just the informational that comes with the reset.
4603 drm_err(>->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",
4604 guc_class, instance, engine->name, reason);
4606 spin_lock_irqsave(&guc->submission_state.lock, flags);
4607 guc->submission_state.reset_fail_mask |= engine->mask;
4608 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
4611 * A GT reset flushes this worker queue (G2H handler) so we must use
4612 * another worker to trigger a GT reset.
4614 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker);
4619 void intel_guc_find_hung_context(struct intel_engine_cs *engine)
4621 struct intel_guc *guc = &engine->gt->uc.guc;
4622 struct intel_context *ce;
4623 struct i915_request *rq;
4624 unsigned long index;
4625 unsigned long flags;
4627 /* Reset called during driver load? GuC not yet initialised! */
4628 if (unlikely(!guc_submission_initialized(guc)))
4631 xa_lock_irqsave(&guc->context_lookup, flags);
4632 xa_for_each(&guc->context_lookup, index, ce) {
4633 if (!kref_get_unless_zero(&ce->ref))
4636 xa_unlock(&guc->context_lookup);
4638 if (!intel_context_is_pinned(ce))
4641 if (intel_engine_is_virtual(ce->engine)) {
4642 if (!(ce->engine->mask & engine->mask))
4645 if (ce->engine != engine)
4649 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) {
4650 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE)
4653 intel_engine_set_hung_context(engine, ce);
4655 /* Can only cope with one hang at a time... */
4656 intel_context_put(ce);
4657 xa_lock(&guc->context_lookup);
4661 intel_context_put(ce);
4662 xa_lock(&guc->context_lookup);
4665 xa_unlock_irqrestore(&guc->context_lookup, flags);
4668 void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
4669 struct i915_request *hung_rq,
4670 struct drm_printer *m)
4672 struct intel_guc *guc = &engine->gt->uc.guc;
4673 struct intel_context *ce;
4674 unsigned long index;
4675 unsigned long flags;
4677 /* Reset called during driver load? GuC not yet initialised! */
4678 if (unlikely(!guc_submission_initialized(guc)))
4681 xa_lock_irqsave(&guc->context_lookup, flags);
4682 xa_for_each(&guc->context_lookup, index, ce) {
4683 if (!kref_get_unless_zero(&ce->ref))
4686 xa_unlock(&guc->context_lookup);
4688 if (!intel_context_is_pinned(ce))
4691 if (intel_engine_is_virtual(ce->engine)) {
4692 if (!(ce->engine->mask & engine->mask))
4695 if (ce->engine != engine)
4699 spin_lock(&ce->guc_state.lock);
4700 intel_engine_dump_active_requests(&ce->guc_state.requests,
4702 spin_unlock(&ce->guc_state.lock);
4705 intel_context_put(ce);
4706 xa_lock(&guc->context_lookup);
4708 xa_unlock_irqrestore(&guc->context_lookup, flags);
4711 void intel_guc_submission_print_info(struct intel_guc *guc,
4712 struct drm_printer *p)
4714 struct i915_sched_engine *sched_engine = guc->sched_engine;
4716 unsigned long flags;
4721 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n",
4722 atomic_read(&guc->outstanding_submission_g2h));
4723 drm_printf(p, "GuC tasklet count: %u\n\n",
4724 atomic_read(&sched_engine->tasklet.count));
4726 spin_lock_irqsave(&sched_engine->lock, flags);
4727 drm_printf(p, "Requests in GuC submit tasklet:\n");
4728 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) {
4729 struct i915_priolist *pl = to_priolist(rb);
4730 struct i915_request *rq;
4732 priolist_for_each_request(rq, pl)
4733 drm_printf(p, "guc_id=%u, seqno=%llu\n",
4734 rq->context->guc_id.id,
4737 spin_unlock_irqrestore(&sched_engine->lock, flags);
4738 drm_printf(p, "\n");
4741 static inline void guc_log_context_priority(struct drm_printer *p,
4742 struct intel_context *ce)
4746 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio);
4747 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n");
4748 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH;
4749 i < GUC_CLIENT_PRIORITY_NUM; ++i) {
4750 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n",
4751 i, ce->guc_state.prio_count[i]);
4753 drm_printf(p, "\n");
4756 static inline void guc_log_context(struct drm_printer *p,
4757 struct intel_context *ce)
4759 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
4760 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
4761 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
4763 ce->lrc_reg_state[CTX_RING_HEAD]);
4764 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
4766 ce->lrc_reg_state[CTX_RING_TAIL]);
4767 drm_printf(p, "\t\tContext Pin Count: %u\n",
4768 atomic_read(&ce->pin_count));
4769 drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
4770 atomic_read(&ce->guc_id.ref));
4771 drm_printf(p, "\t\tSchedule State: 0x%x\n\n",
4772 ce->guc_state.sched_state);
4775 void intel_guc_submission_print_context_info(struct intel_guc *guc,
4776 struct drm_printer *p)
4778 struct intel_context *ce;
4779 unsigned long index;
4780 unsigned long flags;
4782 xa_lock_irqsave(&guc->context_lookup, flags);
4783 xa_for_each(&guc->context_lookup, index, ce) {
4784 GEM_BUG_ON(intel_context_is_child(ce));
4786 guc_log_context(p, ce);
4787 guc_log_context_priority(p, ce);
4789 if (intel_context_is_parent(ce)) {
4790 struct intel_context *child;
4792 drm_printf(p, "\t\tNumber children: %u\n",
4793 ce->parallel.number_children);
4795 if (ce->parallel.guc.wq_status) {
4796 drm_printf(p, "\t\tWQI Head: %u\n",
4797 READ_ONCE(*ce->parallel.guc.wq_head));
4798 drm_printf(p, "\t\tWQI Tail: %u\n",
4799 READ_ONCE(*ce->parallel.guc.wq_tail));
4800 drm_printf(p, "\t\tWQI Status: %u\n\n",
4801 READ_ONCE(*ce->parallel.guc.wq_status));
4804 if (ce->engine->emit_bb_start ==
4805 emit_bb_start_parent_no_preempt_mid_batch) {
4808 drm_printf(p, "\t\tChildren Go: %u\n\n",
4809 get_children_go_value(ce));
4810 for (i = 0; i < ce->parallel.number_children; ++i)
4811 drm_printf(p, "\t\tChildren Join: %u\n",
4812 get_children_join_value(ce, i));
4815 for_each_child(ce, child)
4816 guc_log_context(p, child);
4819 xa_unlock_irqrestore(&guc->context_lookup, flags);
4822 static inline u32 get_children_go_addr(struct intel_context *ce)
4824 GEM_BUG_ON(!intel_context_is_parent(ce));
4826 return i915_ggtt_offset(ce->state) +
4827 __get_parent_scratch_offset(ce) +
4828 offsetof(struct parent_scratch, go.semaphore);
4831 static inline u32 get_children_join_addr(struct intel_context *ce,
4834 GEM_BUG_ON(!intel_context_is_parent(ce));
4836 return i915_ggtt_offset(ce->state) +
4837 __get_parent_scratch_offset(ce) +
4838 offsetof(struct parent_scratch, join[child_index].semaphore);
4841 #define PARENT_GO_BB 1
4842 #define PARENT_GO_FINI_BREADCRUMB 0
4843 #define CHILD_GO_BB 1
4844 #define CHILD_GO_FINI_BREADCRUMB 0
4845 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
4846 u64 offset, u32 len,
4847 const unsigned int flags)
4849 struct intel_context *ce = rq->context;
4853 GEM_BUG_ON(!intel_context_is_parent(ce));
4855 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children);
4859 /* Wait on children */
4860 for (i = 0; i < ce->parallel.number_children; ++i) {
4861 *cs++ = (MI_SEMAPHORE_WAIT |
4862 MI_SEMAPHORE_GLOBAL_GTT |
4864 MI_SEMAPHORE_SAD_EQ_SDD);
4865 *cs++ = PARENT_GO_BB;
4866 *cs++ = get_children_join_addr(ce, i);
4870 /* Turn off preemption */
4871 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4874 /* Tell children go */
4875 cs = gen8_emit_ggtt_write(cs,
4877 get_children_go_addr(ce),
4881 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4882 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4883 *cs++ = lower_32_bits(offset);
4884 *cs++ = upper_32_bits(offset);
4887 intel_ring_advance(rq, cs);
4892 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
4893 u64 offset, u32 len,
4894 const unsigned int flags)
4896 struct intel_context *ce = rq->context;
4897 struct intel_context *parent = intel_context_to_parent(ce);
4900 GEM_BUG_ON(!intel_context_is_child(ce));
4902 cs = intel_ring_begin(rq, 12);
4907 cs = gen8_emit_ggtt_write(cs,
4909 get_children_join_addr(parent,
4910 ce->parallel.child_index),
4913 /* Wait on parent for go */
4914 *cs++ = (MI_SEMAPHORE_WAIT |
4915 MI_SEMAPHORE_GLOBAL_GTT |
4917 MI_SEMAPHORE_SAD_EQ_SDD);
4918 *cs++ = CHILD_GO_BB;
4919 *cs++ = get_children_go_addr(parent);
4922 /* Turn off preemption */
4923 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4926 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4927 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4928 *cs++ = lower_32_bits(offset);
4929 *cs++ = upper_32_bits(offset);
4931 intel_ring_advance(rq, cs);
4937 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
4940 struct intel_context *ce = rq->context;
4943 GEM_BUG_ON(!intel_context_is_parent(ce));
4945 /* Wait on children */
4946 for (i = 0; i < ce->parallel.number_children; ++i) {
4947 *cs++ = (MI_SEMAPHORE_WAIT |
4948 MI_SEMAPHORE_GLOBAL_GTT |
4950 MI_SEMAPHORE_SAD_EQ_SDD);
4951 *cs++ = PARENT_GO_FINI_BREADCRUMB;
4952 *cs++ = get_children_join_addr(ce, i);
4956 /* Turn on preemption */
4957 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4960 /* Tell children go */
4961 cs = gen8_emit_ggtt_write(cs,
4962 CHILD_GO_FINI_BREADCRUMB,
4963 get_children_go_addr(ce),
4970 * If this true, a submission of multi-lrc requests had an error and the
4971 * requests need to be skipped. The front end (execuf IOCTL) should've called
4972 * i915_request_skip which squashes the BB but we still need to emit the fini
4973 * breadrcrumbs seqno write. At this point we don't know how many of the
4974 * requests in the multi-lrc submission were generated so we can't do the
4975 * handshake between the parent and children (e.g. if 4 requests should be
4976 * generated but 2nd hit an error only 1 would be seen by the GuC backend).
4977 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
4978 * has occurred on any of the requests in submission / relationship.
4980 static inline bool skip_handshake(struct i915_request *rq)
4982 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
4985 #define NON_SKIP_LEN 6
4987 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
4990 struct intel_context *ce = rq->context;
4991 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
4992 __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
4994 GEM_BUG_ON(!intel_context_is_parent(ce));
4996 if (unlikely(skip_handshake(rq))) {
4998 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch,
4999 * the NON_SKIP_LEN comes from the length of the emits below.
5001 memset(cs, 0, sizeof(u32) *
5002 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
5003 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
5005 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
5008 /* Emit fini breadcrumb */
5009 before_fini_breadcrumb_user_interrupt_cs = cs;
5010 cs = gen8_emit_ggtt_write(cs,
5012 i915_request_active_timeline(rq)->hwsp_offset,
5015 /* User interrupt */
5016 *cs++ = MI_USER_INTERRUPT;
5019 /* Ensure our math for skip + emit is correct */
5020 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
5022 GEM_BUG_ON(start_fini_breadcrumb_cs +
5023 ce->engine->emit_fini_breadcrumb_dw != cs);
5025 rq->tail = intel_ring_offset(rq, cs);
5031 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
5034 struct intel_context *ce = rq->context;
5035 struct intel_context *parent = intel_context_to_parent(ce);
5037 GEM_BUG_ON(!intel_context_is_child(ce));
5039 /* Turn on preemption */
5040 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5044 cs = gen8_emit_ggtt_write(cs,
5045 PARENT_GO_FINI_BREADCRUMB,
5046 get_children_join_addr(parent,
5047 ce->parallel.child_index),
5050 /* Wait parent on for go */
5051 *cs++ = (MI_SEMAPHORE_WAIT |
5052 MI_SEMAPHORE_GLOBAL_GTT |
5054 MI_SEMAPHORE_SAD_EQ_SDD);
5055 *cs++ = CHILD_GO_FINI_BREADCRUMB;
5056 *cs++ = get_children_go_addr(parent);
5063 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
5066 struct intel_context *ce = rq->context;
5067 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
5068 __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
5070 GEM_BUG_ON(!intel_context_is_child(ce));
5072 if (unlikely(skip_handshake(rq))) {
5074 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch,
5075 * the NON_SKIP_LEN comes from the length of the emits below.
5077 memset(cs, 0, sizeof(u32) *
5078 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
5079 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
5081 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
5084 /* Emit fini breadcrumb */
5085 before_fini_breadcrumb_user_interrupt_cs = cs;
5086 cs = gen8_emit_ggtt_write(cs,
5088 i915_request_active_timeline(rq)->hwsp_offset,
5091 /* User interrupt */
5092 *cs++ = MI_USER_INTERRUPT;
5095 /* Ensure our math for skip + emit is correct */
5096 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
5098 GEM_BUG_ON(start_fini_breadcrumb_cs +
5099 ce->engine->emit_fini_breadcrumb_dw != cs);
5101 rq->tail = intel_ring_offset(rq, cs);
5108 static struct intel_context *
5109 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
5110 unsigned long flags)
5112 struct guc_virtual_engine *ve;
5113 struct intel_guc *guc;
5117 ve = kzalloc(sizeof(*ve), GFP_KERNEL);
5119 return ERR_PTR(-ENOMEM);
5121 guc = &siblings[0]->gt->uc.guc;
5123 ve->base.i915 = siblings[0]->i915;
5124 ve->base.gt = siblings[0]->gt;
5125 ve->base.uncore = siblings[0]->uncore;
5128 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5129 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5130 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5131 ve->base.saturated = ALL_ENGINES;
5133 snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5135 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine);
5137 ve->base.cops = &virtual_guc_context_ops;
5138 ve->base.request_alloc = guc_request_alloc;
5139 ve->base.bump_serial = virtual_guc_bump_serial;
5141 ve->base.submit_request = guc_submit_request;
5143 ve->base.flags = I915_ENGINE_IS_VIRTUAL;
5145 intel_context_init(&ve->context, &ve->base);
5147 for (n = 0; n < count; n++) {
5148 struct intel_engine_cs *sibling = siblings[n];
5150 GEM_BUG_ON(!is_power_of_2(sibling->mask));
5151 if (sibling->mask & ve->base.mask) {
5152 DRM_DEBUG("duplicate %s entry in load balancer\n",
5158 ve->base.mask |= sibling->mask;
5159 ve->base.logical_mask |= sibling->logical_mask;
5161 if (n != 0 && ve->base.class != sibling->class) {
5162 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5163 sibling->class, ve->base.class);
5166 } else if (n == 0) {
5167 ve->base.class = sibling->class;
5168 ve->base.uabi_class = sibling->uabi_class;
5169 snprintf(ve->base.name, sizeof(ve->base.name),
5170 "v%dx%d", ve->base.class, count);
5171 ve->base.context_size = sibling->context_size;
5173 ve->base.add_active_request =
5174 sibling->add_active_request;
5175 ve->base.remove_active_request =
5176 sibling->remove_active_request;
5177 ve->base.emit_bb_start = sibling->emit_bb_start;
5178 ve->base.emit_flush = sibling->emit_flush;
5179 ve->base.emit_init_breadcrumb =
5180 sibling->emit_init_breadcrumb;
5181 ve->base.emit_fini_breadcrumb =
5182 sibling->emit_fini_breadcrumb;
5183 ve->base.emit_fini_breadcrumb_dw =
5184 sibling->emit_fini_breadcrumb_dw;
5185 ve->base.breadcrumbs =
5186 intel_breadcrumbs_get(sibling->breadcrumbs);
5188 ve->base.flags |= sibling->flags;
5190 ve->base.props.timeslice_duration_ms =
5191 sibling->props.timeslice_duration_ms;
5192 ve->base.props.preempt_timeout_ms =
5193 sibling->props.preempt_timeout_ms;
5197 return &ve->context;
5200 intel_context_put(&ve->context);
5201 return ERR_PTR(err);
5204 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
5206 struct intel_engine_cs *engine;
5207 intel_engine_mask_t tmp, mask = ve->mask;
5209 for_each_engine_masked(engine, ve->gt, mask, tmp)
5210 if (READ_ONCE(engine->props.heartbeat_interval_ms))
5216 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5217 #include "selftest_guc.c"
5218 #include "selftest_guc_multi_lrc.c"