drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2014 Intel Corporation
   4  */
   5
   6 #include <linux/circ_buf.h>
   7
   8 #include "gem/i915_gem_context.h"
   9 #include "gt/gen8_engine_cs.h"
  10 #include "gt/intel_breadcrumbs.h"
  11 #include "gt/intel_context.h"
  12 #include "gt/intel_engine_heartbeat.h"
  13 #include "gt/intel_engine_pm.h"
  14 #include "gt/intel_engine_regs.h"
  15 #include "gt/intel_gpu_commands.h"
  16 #include "gt/intel_gt.h"
  17 #include "gt/intel_gt_clock_utils.h"
  18 #include "gt/intel_gt_irq.h"
  19 #include "gt/intel_gt_pm.h"
  20 #include "gt/intel_gt_regs.h"
  21 #include "gt/intel_gt_requests.h"
  22 #include "gt/intel_lrc.h"
  23 #include "gt/intel_lrc_reg.h"
  24 #include "gt/intel_mocs.h"
  25 #include "gt/intel_ring.h"
  26
  27 #include "intel_guc_ads.h"
  28 #include "intel_guc_capture.h"
  29 #include "intel_guc_submission.h"
  30
  31 #include "i915_drv.h"
  32 #include "i915_trace.h"
  33
  34 /**
  35  * DOC: GuC-based command submission
  36  *
  37  * The Scratch registers:
  38  * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
  39  * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
  40  * triggers an interrupt on the GuC via another register write (0xC4C8).
  41  * Firmware writes a success/fail code back to the action register after
  42  * processes the request. The kernel driver polls waiting for this update and
  43  * then proceeds.
  44  *
  45  * Command Transport buffers (CTBs):
  46  * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host
  47  * - G2H) are a message interface between the i915 and GuC.
  48  *
  49  * Context registration:
  50  * Before a context can be submitted it must be registered with the GuC via a
  51  * H2G. A unique guc_id is associated with each context. The context is either
  52  * registered at request creation time (normal operation) or at submission time
  53  * (abnormal operation, e.g. after a reset).
  54  *
  55  * Context submission:
  56  * The i915 updates the LRC tail value in memory. The i915 must enable the
  57  * scheduling of the context within the GuC for the GuC to actually consider it.
  58  * Therefore, the first time a disabled context is submitted we use a schedule
  59  * enable H2G, while follow up submissions are done via the context submit H2G,
  60  * which informs the GuC that a previously enabled context has new work
  61  * available.
  62  *
  63  * Context unpin:
  64  * To unpin a context a H2G is used to disable scheduling. When the
  65  * corresponding G2H returns indicating the scheduling disable operation has
  66  * completed it is safe to unpin the context. While a disable is in flight it
  67  * isn't safe to resubmit the context so a fence is used to stall all future
  68  * requests of that context until the G2H is returned.
  69  *
  70  * Context deregistration:
  71  * Before a context can be destroyed or if we steal its guc_id we must
  72  * deregister the context with the GuC via H2G. If stealing the guc_id it isn't
  73  * safe to submit anything to this guc_id until the deregister completes so a
  74  * fence is used to stall all requests associated with this guc_id until the
  75  * corresponding G2H returns indicating the guc_id has been deregistered.
  76  *
  77  * submission_state.guc_ids:
  78  * Unique number associated with private GuC context data passed in during
  79  * context registration / submission / deregistration. 64k available. Simple ida
  80  * is used for allocation.
  81  *
  82  * Stealing guc_ids:
  83  * If no guc_ids are available they can be stolen from another context at
  84  * request creation time if that context is unpinned. If a guc_id can't be found
  85  * we punt this problem to the user as we believe this is near impossible to hit
  86  * during normal use cases.
  87  *
  88  * Locking:
  89  * In the GuC submission code we have 3 basic spin locks which protect
  90  * everything. Details about each below.
  91  *
  92  * sched_engine->lock
  93  * This is the submission lock for all contexts that share an i915 schedule
  94  * engine (sched_engine), thus only one of the contexts which share a
  95  * sched_engine can be submitting at a time. Currently only one sched_engine is
  96  * used for all of GuC submission but that could change in the future.
  97  *
  98  * guc->submission_state.lock
  99  * Global lock for GuC submission state. Protects guc_ids and destroyed contexts
 100  * list.
 101  *
 102  * ce->guc_state.lock
 103  * Protects everything under ce->guc_state. Ensures that a context is in the
 104  * correct state before issuing a H2G. e.g. We don't issue a schedule disable
 105  * on a disabled context (bad idea), we don't issue a schedule enable when a
 106  * schedule disable is in flight, etc... Also protects list of inflight requests
 107  * on the context and the priority management state. Lock is individual to each
 108  * context.
 109  *
 110  * Lock ordering rules:
 111  * sched_engine->lock -> ce->guc_state.lock
 112  * guc->submission_state.lock -> ce->guc_state.lock
 113  *
 114  * Reset races:
 115  * When a full GT reset is triggered it is assumed that some G2H responses to
 116  * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be
 117  * fatal as we do certain operations upon receiving a G2H (e.g. destroy
 118  * contexts, release guc_ids, etc...). When this occurs we can scrub the
 119  * context state and cleanup appropriately, however this is quite racey.
 120  * To avoid races, the reset code must disable submission before scrubbing for
 121  * the missing G2H, while the submission code must check for submission being
 122  * disabled and skip sending H2Gs and updating context states when it is. Both
 123  * sides must also make sure to hold the relevant locks.
 124  */
 125
 126 /* GuC Virtual Engine */
 127 struct guc_virtual_engine {
 128         struct intel_engine_cs base;
 129         struct intel_context context;
 130 };
 131
 132 static struct intel_context *
 133 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
 134                    unsigned long flags);
 135
 136 static struct intel_context *
 137 guc_create_parallel(struct intel_engine_cs **engines,
 138                     unsigned int num_siblings,
 139                     unsigned int width);
 140
 141 #define GUC_REQUEST_SIZE 64 /* bytes */
 142
 143 /*
 144  * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
 145  * per the GuC submission interface. A different allocation algorithm is used
 146  * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
 147  * partition the guc_id space. We believe the number of multi-lrc contexts in
 148  * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
 149  * multi-lrc.
 150  */
 151 #define NUMBER_MULTI_LRC_GUC_ID(guc)    \
 152         ((guc)->submission_state.num_guc_ids / 16)
 153
 154 /*
 155  * Below is a set of functions which control the GuC scheduling state which
 156  * require a lock.
 157  */
 158 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER     BIT(0)
 159 #define SCHED_STATE_DESTROYED                           BIT(1)
 160 #define SCHED_STATE_PENDING_DISABLE                     BIT(2)
 161 #define SCHED_STATE_BANNED                              BIT(3)
 162 #define SCHED_STATE_ENABLED                             BIT(4)
 163 #define SCHED_STATE_PENDING_ENABLE                      BIT(5)
 164 #define SCHED_STATE_REGISTERED                          BIT(6)
 165 #define SCHED_STATE_POLICY_REQUIRED                     BIT(7)
 166 #define SCHED_STATE_BLOCKED_SHIFT                       8
 167 #define SCHED_STATE_BLOCKED             BIT(SCHED_STATE_BLOCKED_SHIFT)
 168 #define SCHED_STATE_BLOCKED_MASK        (0xfff << SCHED_STATE_BLOCKED_SHIFT)
 169
 170 static inline void init_sched_state(struct intel_context *ce)
 171 {
 172         lockdep_assert_held(&ce->guc_state.lock);
 173         ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
 174 }
 175
 176 __maybe_unused
 177 static bool sched_state_is_init(struct intel_context *ce)
 178 {
 179         /* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */
 180         return !(ce->guc_state.sched_state &
 181                  ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED));
 182 }
 183
 184 static inline bool
 185 context_wait_for_deregister_to_register(struct intel_context *ce)
 186 {
 187         return ce->guc_state.sched_state &
 188                 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
 189 }
 190
 191 static inline void
 192 set_context_wait_for_deregister_to_register(struct intel_context *ce)
 193 {
 194         lockdep_assert_held(&ce->guc_state.lock);
 195         ce->guc_state.sched_state |=
 196                 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
 197 }
 198
 199 static inline void
 200 clr_context_wait_for_deregister_to_register(struct intel_context *ce)
 201 {
 202         lockdep_assert_held(&ce->guc_state.lock);
 203         ce->guc_state.sched_state &=
 204                 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
 205 }
 206
 207 static inline bool
 208 context_destroyed(struct intel_context *ce)
 209 {
 210         return ce->guc_state.sched_state & SCHED_STATE_DESTROYED;
 211 }
 212
 213 static inline void
 214 set_context_destroyed(struct intel_context *ce)
 215 {
 216         lockdep_assert_held(&ce->guc_state.lock);
 217         ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
 218 }
 219
 220 static inline bool context_pending_disable(struct intel_context *ce)
 221 {
 222         return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
 223 }
 224
 225 static inline void set_context_pending_disable(struct intel_context *ce)
 226 {
 227         lockdep_assert_held(&ce->guc_state.lock);
 228         ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE;
 229 }
 230
 231 static inline void clr_context_pending_disable(struct intel_context *ce)
 232 {
 233         lockdep_assert_held(&ce->guc_state.lock);
 234         ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE;
 235 }
 236
 237 static inline bool context_banned(struct intel_context *ce)
 238 {
 239         return ce->guc_state.sched_state & SCHED_STATE_BANNED;
 240 }
 241
 242 static inline void set_context_banned(struct intel_context *ce)
 243 {
 244         lockdep_assert_held(&ce->guc_state.lock);
 245         ce->guc_state.sched_state |= SCHED_STATE_BANNED;
 246 }
 247
 248 static inline void clr_context_banned(struct intel_context *ce)
 249 {
 250         lockdep_assert_held(&ce->guc_state.lock);
 251         ce->guc_state.sched_state &= ~SCHED_STATE_BANNED;
 252 }
 253
 254 static inline bool context_enabled(struct intel_context *ce)
 255 {
 256         return ce->guc_state.sched_state & SCHED_STATE_ENABLED;
 257 }
 258
 259 static inline void set_context_enabled(struct intel_context *ce)
 260 {
 261         lockdep_assert_held(&ce->guc_state.lock);
 262         ce->guc_state.sched_state |= SCHED_STATE_ENABLED;
 263 }
 264
 265 static inline void clr_context_enabled(struct intel_context *ce)
 266 {
 267         lockdep_assert_held(&ce->guc_state.lock);
 268         ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED;
 269 }
 270
 271 static inline bool context_pending_enable(struct intel_context *ce)
 272 {
 273         return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE;
 274 }
 275
 276 static inline void set_context_pending_enable(struct intel_context *ce)
 277 {
 278         lockdep_assert_held(&ce->guc_state.lock);
 279         ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE;
 280 }
 281
 282 static inline void clr_context_pending_enable(struct intel_context *ce)
 283 {
 284         lockdep_assert_held(&ce->guc_state.lock);
 285         ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE;
 286 }
 287
 288 static inline bool context_registered(struct intel_context *ce)
 289 {
 290         return ce->guc_state.sched_state & SCHED_STATE_REGISTERED;
 291 }
 292
 293 static inline void set_context_registered(struct intel_context *ce)
 294 {
 295         lockdep_assert_held(&ce->guc_state.lock);
 296         ce->guc_state.sched_state |= SCHED_STATE_REGISTERED;
 297 }
 298
 299 static inline void clr_context_registered(struct intel_context *ce)
 300 {
 301         lockdep_assert_held(&ce->guc_state.lock);
 302         ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED;
 303 }
 304
 305 static inline bool context_policy_required(struct intel_context *ce)
 306 {
 307         return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED;
 308 }
 309
 310 static inline void set_context_policy_required(struct intel_context *ce)
 311 {
 312         lockdep_assert_held(&ce->guc_state.lock);
 313         ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED;
 314 }
 315
 316 static inline void clr_context_policy_required(struct intel_context *ce)
 317 {
 318         lockdep_assert_held(&ce->guc_state.lock);
 319         ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED;
 320 }
 321
 322 static inline u32 context_blocked(struct intel_context *ce)
 323 {
 324         return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
 325                 SCHED_STATE_BLOCKED_SHIFT;
 326 }
 327
 328 static inline void incr_context_blocked(struct intel_context *ce)
 329 {
 330         lockdep_assert_held(&ce->guc_state.lock);
 331
 332         ce->guc_state.sched_state += SCHED_STATE_BLOCKED;
 333
 334         GEM_BUG_ON(!context_blocked(ce));       /* Overflow check */
 335 }
 336
 337 static inline void decr_context_blocked(struct intel_context *ce)
 338 {
 339         lockdep_assert_held(&ce->guc_state.lock);
 340
 341         GEM_BUG_ON(!context_blocked(ce));       /* Underflow check */
 342
 343         ce->guc_state.sched_state -= SCHED_STATE_BLOCKED;
 344 }
 345
 346 static inline bool context_has_committed_requests(struct intel_context *ce)
 347 {
 348         return !!ce->guc_state.number_committed_requests;
 349 }
 350
 351 static inline void incr_context_committed_requests(struct intel_context *ce)
 352 {
 353         lockdep_assert_held(&ce->guc_state.lock);
 354         ++ce->guc_state.number_committed_requests;
 355         GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
 356 }
 357
 358 static inline void decr_context_committed_requests(struct intel_context *ce)
 359 {
 360         lockdep_assert_held(&ce->guc_state.lock);
 361         --ce->guc_state.number_committed_requests;
 362         GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
 363 }
 364
 365 static struct intel_context *
 366 request_to_scheduling_context(struct i915_request *rq)
 367 {
 368         return intel_context_to_parent(rq->context);
 369 }
 370
 371 static inline bool context_guc_id_invalid(struct intel_context *ce)
 372 {
 373         return ce->guc_id.id == GUC_INVALID_CONTEXT_ID;
 374 }
 375
 376 static inline void set_context_guc_id_invalid(struct intel_context *ce)
 377 {
 378         ce->guc_id.id = GUC_INVALID_CONTEXT_ID;
 379 }
 380
 381 static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
 382 {
 383         return &ce->engine->gt->uc.guc;
 384 }
 385
 386 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 387 {
 388         return rb_entry(rb, struct i915_priolist, node);
 389 }
 390
 391 /*
 392  * When using multi-lrc submission a scratch memory area is reserved in the
 393  * parent's context state for the process descriptor, work queue, and handshake
 394  * between the parent + children contexts to insert safe preemption points
 395  * between each of the BBs. Currently the scratch area is sized to a page.
 396  *
 397  * The layout of this scratch area is below:
 398  * 0                                            guc_process_desc
 399  * + sizeof(struct guc_process_desc)            child go
 400  * + CACHELINE_BYTES                            child join[0]
 401  * ...
 402  * + CACHELINE_BYTES                            child join[n - 1]
 403  * ...                                          unused
 404  * PARENT_SCRATCH_SIZE / 2                      work queue start
 405  * ...                                          work queue
 406  * PARENT_SCRATCH_SIZE - 1                      work queue end
 407  */
 408 #define WQ_SIZE                 (PARENT_SCRATCH_SIZE / 2)
 409 #define WQ_OFFSET               (PARENT_SCRATCH_SIZE - WQ_SIZE)
 410
 411 struct sync_semaphore {
 412         u32 semaphore;
 413         u8 unused[CACHELINE_BYTES - sizeof(u32)];
 414 };
 415
 416 struct parent_scratch {
 417         union guc_descs {
 418                 struct guc_sched_wq_desc wq_desc;
 419                 struct guc_process_desc_v69 pdesc;
 420         } descs;
 421
 422         struct sync_semaphore go;
 423         struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
 424
 425         u8 unused[WQ_OFFSET - sizeof(union guc_descs) -
 426                 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
 427
 428         u32 wq[WQ_SIZE / sizeof(u32)];
 429 };
 430
 431 static u32 __get_parent_scratch_offset(struct intel_context *ce)
 432 {
 433         GEM_BUG_ON(!ce->parallel.guc.parent_page);
 434
 435         return ce->parallel.guc.parent_page * PAGE_SIZE;
 436 }
 437
 438 static u32 __get_wq_offset(struct intel_context *ce)
 439 {
 440         BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET);
 441
 442         return __get_parent_scratch_offset(ce) + WQ_OFFSET;
 443 }
 444
 445 static struct parent_scratch *
 446 __get_parent_scratch(struct intel_context *ce)
 447 {
 448         BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE);
 449         BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES);
 450
 451         /*
 452          * Need to subtract LRC_STATE_OFFSET here as the
 453          * parallel.guc.parent_page is the offset into ce->state while
 454          * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET.
 455          */
 456         return (struct parent_scratch *)
 457                 (ce->lrc_reg_state +
 458                  ((__get_parent_scratch_offset(ce) -
 459                    LRC_STATE_OFFSET) / sizeof(u32)));
 460 }
 461
 462 static struct guc_process_desc_v69 *
 463 __get_process_desc_v69(struct intel_context *ce)
 464 {
 465         struct parent_scratch *ps = __get_parent_scratch(ce);
 466
 467         return &ps->descs.pdesc;
 468 }
 469
 470 static struct guc_sched_wq_desc *
 471 __get_wq_desc_v70(struct intel_context *ce)
 472 {
 473         struct parent_scratch *ps = __get_parent_scratch(ce);
 474
 475         return &ps->descs.wq_desc;
 476 }
 477
 478 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size)
 479 {
 480         /*
 481          * Check for space in work queue. Caching a value of head pointer in
 482          * intel_context structure in order reduce the number accesses to shared
 483          * GPU memory which may be across a PCIe bus.
 484          */
 485 #define AVAILABLE_SPACE \
 486         CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
 487         if (wqi_size > AVAILABLE_SPACE) {
 488                 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head);
 489
 490                 if (wqi_size > AVAILABLE_SPACE)
 491                         return NULL;
 492         }
 493 #undef AVAILABLE_SPACE
 494
 495         return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)];
 496 }
 497
 498 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
 499 {
 500         struct intel_context *ce = xa_load(&guc->context_lookup, id);
 501
 502         GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID);
 503
 504         return ce;
 505 }
 506
 507 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index)
 508 {
 509         struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69;
 510
 511         if (!base)
 512                 return NULL;
 513
 514         GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID);
 515
 516         return &base[index];
 517 }
 518
 519 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc)
 520 {
 521         u32 size;
 522         int ret;
 523
 524         size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) *
 525                           GUC_MAX_CONTEXT_ID);
 526         ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69,
 527                                              (void **)&guc->lrc_desc_pool_vaddr_v69);
 528         if (ret)
 529                 return ret;
 530
 531         return 0;
 532 }
 533
 534 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc)
 535 {
 536         if (!guc->lrc_desc_pool_vaddr_v69)
 537                 return;
 538
 539         guc->lrc_desc_pool_vaddr_v69 = NULL;
 540         i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP);
 541 }
 542
 543 static inline bool guc_submission_initialized(struct intel_guc *guc)
 544 {
 545         return guc->submission_initialized;
 546 }
 547
 548 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id)
 549 {
 550         struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id);
 551
 552         if (desc)
 553                 memset(desc, 0, sizeof(*desc));
 554 }
 555
 556 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id)
 557 {
 558         return __get_context(guc, id);
 559 }
 560
 561 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id,
 562                                       struct intel_context *ce)
 563 {
 564         unsigned long flags;
 565
 566         /*
 567          * xarray API doesn't have xa_save_irqsave wrapper, so calling the
 568          * lower level functions directly.
 569          */
 570         xa_lock_irqsave(&guc->context_lookup, flags);
 571         __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC);
 572         xa_unlock_irqrestore(&guc->context_lookup, flags);
 573 }
 574
 575 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id)
 576 {
 577         unsigned long flags;
 578
 579         if (unlikely(!guc_submission_initialized(guc)))
 580                 return;
 581
 582         _reset_lrc_desc_v69(guc, id);
 583
 584         /*
 585          * xarray API doesn't have xa_erase_irqsave wrapper, so calling
 586          * the lower level functions directly.
 587          */
 588         xa_lock_irqsave(&guc->context_lookup, flags);
 589         __xa_erase(&guc->context_lookup, id);
 590         xa_unlock_irqrestore(&guc->context_lookup, flags);
 591 }
 592
 593 static void decr_outstanding_submission_g2h(struct intel_guc *guc)
 594 {
 595         if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
 596                 wake_up_all(&guc->ct.wq);
 597 }
 598
 599 static int guc_submission_send_busy_loop(struct intel_guc *guc,
 600                                          const u32 *action,
 601                                          u32 len,
 602                                          u32 g2h_len_dw,
 603                                          bool loop)
 604 {
 605         /*
 606          * We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
 607          * so we don't handle the case where we don't get a reply because we
 608          * aborted the send due to the channel being busy.
 609          */
 610         GEM_BUG_ON(g2h_len_dw && !loop);
 611
 612         if (g2h_len_dw)
 613                 atomic_inc(&guc->outstanding_submission_g2h);
 614
 615         return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
 616 }
 617
 618 int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
 619                                    atomic_t *wait_var,
 620                                    bool interruptible,
 621                                    long timeout)
 622 {
 623         const int state = interruptible ?
 624                 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 625         DEFINE_WAIT(wait);
 626
 627         might_sleep();
 628         GEM_BUG_ON(timeout < 0);
 629
 630         if (!atomic_read(wait_var))
 631                 return 0;
 632
 633         if (!timeout)
 634                 return -ETIME;
 635
 636         for (;;) {
 637                 prepare_to_wait(&guc->ct.wq, &wait, state);
 638
 639                 if (!atomic_read(wait_var))
 640                         break;
 641
 642                 if (signal_pending_state(state, current)) {
 643                         timeout = -EINTR;
 644                         break;
 645                 }
 646
 647                 if (!timeout) {
 648                         timeout = -ETIME;
 649                         break;
 650                 }
 651
 652                 timeout = io_schedule_timeout(timeout);
 653         }
 654         finish_wait(&guc->ct.wq, &wait);
 655
 656         return (timeout < 0) ? timeout : 0;
 657 }
 658
 659 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
 660 {
 661         if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc))
 662                 return 0;
 663
 664         return intel_guc_wait_for_pending_msg(guc,
 665                                               &guc->outstanding_submission_g2h,
 666                                               true, timeout);
 667 }
 668
 669 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop);
 670 static int try_context_registration(struct intel_context *ce, bool loop);
 671
 672 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 673 {
 674         int err = 0;
 675         struct intel_context *ce = request_to_scheduling_context(rq);
 676         u32 action[3];
 677         int len = 0;
 678         u32 g2h_len_dw = 0;
 679         bool enabled;
 680
 681         lockdep_assert_held(&rq->engine->sched_engine->lock);
 682
 683         /*
 684          * Corner case where requests were sitting in the priority list or a
 685          * request resubmitted after the context was banned.
 686          */
 687         if (unlikely(intel_context_is_banned(ce))) {
 688                 i915_request_put(i915_request_mark_eio(rq));
 689                 intel_engine_signal_breadcrumbs(ce->engine);
 690                 return 0;
 691         }
 692
 693         GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
 694         GEM_BUG_ON(context_guc_id_invalid(ce));
 695
 696         if (context_policy_required(ce)) {
 697                 err = guc_context_policy_init_v70(ce, false);
 698                 if (err)
 699                         return err;
 700         }
 701
 702         spin_lock(&ce->guc_state.lock);
 703
 704         /*
 705          * The request / context will be run on the hardware when scheduling
 706          * gets enabled in the unblock. For multi-lrc we still submit the
 707          * context to move the LRC tails.
 708          */
 709         if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce)))
 710                 goto out;
 711
 712         enabled = context_enabled(ce) || context_blocked(ce);
 713
 714         if (!enabled) {
 715                 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
 716                 action[len++] = ce->guc_id.id;
 717                 action[len++] = GUC_CONTEXT_ENABLE;
 718                 set_context_pending_enable(ce);
 719                 intel_context_get(ce);
 720                 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
 721         } else {
 722                 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT;
 723                 action[len++] = ce->guc_id.id;
 724         }
 725
 726         err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
 727         if (!enabled && !err) {
 728                 trace_intel_context_sched_enable(ce);
 729                 atomic_inc(&guc->outstanding_submission_g2h);
 730                 set_context_enabled(ce);
 731
 732                 /*
 733                  * Without multi-lrc KMD does the submission step (moving the
 734                  * lrc tail) so enabling scheduling is sufficient to submit the
 735                  * context. This isn't the case in multi-lrc submission as the
 736                  * GuC needs to move the tails, hence the need for another H2G
 737                  * to submit a multi-lrc context after enabling scheduling.
 738                  */
 739                 if (intel_context_is_parent(ce)) {
 740                         action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT;
 741                         err = intel_guc_send_nb(guc, action, len - 1, 0);
 742                 }
 743         } else if (!enabled) {
 744                 clr_context_pending_enable(ce);
 745                 intel_context_put(ce);
 746         }
 747         if (likely(!err))
 748                 trace_i915_request_guc_submit(rq);
 749
 750 out:
 751         spin_unlock(&ce->guc_state.lock);
 752         return err;
 753 }
 754
 755 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 756 {
 757         int ret = __guc_add_request(guc, rq);
 758
 759         if (unlikely(ret == -EBUSY)) {
 760                 guc->stalled_request = rq;
 761                 guc->submission_stall_reason = STALL_ADD_REQUEST;
 762         }
 763
 764         return ret;
 765 }
 766
 767 static inline void guc_set_lrc_tail(struct i915_request *rq)
 768 {
 769         rq->context->lrc_reg_state[CTX_RING_TAIL] =
 770                 intel_ring_set_tail(rq->ring, rq->tail);
 771 }
 772
 773 static inline int rq_prio(const struct i915_request *rq)
 774 {
 775         return rq->sched.attr.priority;
 776 }
 777
 778 static bool is_multi_lrc_rq(struct i915_request *rq)
 779 {
 780         return intel_context_is_parallel(rq->context);
 781 }
 782
 783 static bool can_merge_rq(struct i915_request *rq,
 784                          struct i915_request *last)
 785 {
 786         return request_to_scheduling_context(rq) ==
 787                 request_to_scheduling_context(last);
 788 }
 789
 790 static u32 wq_space_until_wrap(struct intel_context *ce)
 791 {
 792         return (WQ_SIZE - ce->parallel.guc.wqi_tail);
 793 }
 794
 795 static void write_wqi(struct intel_context *ce, u32 wqi_size)
 796 {
 797         BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
 798
 799         /*
 800          * Ensure WQI are visible before updating tail
 801          */
 802         intel_guc_write_barrier(ce_to_guc(ce));
 803
 804         ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
 805                 (WQ_SIZE - 1);
 806         WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail);
 807 }
 808
 809 static int guc_wq_noop_append(struct intel_context *ce)
 810 {
 811         u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce));
 812         u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
 813
 814         if (!wqi)
 815                 return -EBUSY;
 816
 817         GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
 818
 819         *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
 820                 FIELD_PREP(WQ_LEN_MASK, len_dw);
 821         ce->parallel.guc.wqi_tail = 0;
 822
 823         return 0;
 824 }
 825
 826 static int __guc_wq_item_append(struct i915_request *rq)
 827 {
 828         struct intel_context *ce = request_to_scheduling_context(rq);
 829         struct intel_context *child;
 830         unsigned int wqi_size = (ce->parallel.number_children + 4) *
 831                 sizeof(u32);
 832         u32 *wqi;
 833         u32 len_dw = (wqi_size / sizeof(u32)) - 1;
 834         int ret;
 835
 836         /* Ensure context is in correct state updating work queue */
 837         GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
 838         GEM_BUG_ON(context_guc_id_invalid(ce));
 839         GEM_BUG_ON(context_wait_for_deregister_to_register(ce));
 840         GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id));
 841
 842         /* Insert NOOP if this work queue item will wrap the tail pointer. */
 843         if (wqi_size > wq_space_until_wrap(ce)) {
 844                 ret = guc_wq_noop_append(ce);
 845                 if (ret)
 846                         return ret;
 847         }
 848
 849         wqi = get_wq_pointer(ce, wqi_size);
 850         if (!wqi)
 851                 return -EBUSY;
 852
 853         GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
 854
 855         *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
 856                 FIELD_PREP(WQ_LEN_MASK, len_dw);
 857         *wqi++ = ce->lrc.lrca;
 858         *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) |
 859                FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64));
 860         *wqi++ = 0;     /* fence_id */
 861         for_each_child(ce, child)
 862                 *wqi++ = child->ring->tail / sizeof(u64);
 863
 864         write_wqi(ce, wqi_size);
 865
 866         return 0;
 867 }
 868
 869 static int guc_wq_item_append(struct intel_guc *guc,
 870                               struct i915_request *rq)
 871 {
 872         struct intel_context *ce = request_to_scheduling_context(rq);
 873         int ret = 0;
 874
 875         if (likely(!intel_context_is_banned(ce))) {
 876                 ret = __guc_wq_item_append(rq);
 877
 878                 if (unlikely(ret == -EBUSY)) {
 879                         guc->stalled_request = rq;
 880                         guc->submission_stall_reason = STALL_MOVE_LRC_TAIL;
 881                 }
 882         }
 883
 884         return ret;
 885 }
 886
 887 static bool multi_lrc_submit(struct i915_request *rq)
 888 {
 889         struct intel_context *ce = request_to_scheduling_context(rq);
 890
 891         intel_ring_set_tail(rq->ring, rq->tail);
 892
 893         /*
 894          * We expect the front end (execbuf IOCTL) to set this flag on the last
 895          * request generated from a multi-BB submission. This indicates to the
 896          * backend (GuC interface) that we should submit this context thus
 897          * submitting all the requests generated in parallel.
 898          */
 899         return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) ||
 900                 intel_context_is_banned(ce);
 901 }
 902
 903 static int guc_dequeue_one_context(struct intel_guc *guc)
 904 {
 905         struct i915_sched_engine * const sched_engine = guc->sched_engine;
 906         struct i915_request *last = NULL;
 907         bool submit = false;
 908         struct rb_node *rb;
 909         int ret;
 910
 911         lockdep_assert_held(&sched_engine->lock);
 912
 913         if (guc->stalled_request) {
 914                 submit = true;
 915                 last = guc->stalled_request;
 916
 917                 switch (guc->submission_stall_reason) {
 918                 case STALL_REGISTER_CONTEXT:
 919                         goto register_context;
 920                 case STALL_MOVE_LRC_TAIL:
 921                         goto move_lrc_tail;
 922                 case STALL_ADD_REQUEST:
 923                         goto add_request;
 924                 default:
 925                         MISSING_CASE(guc->submission_stall_reason);
 926                 }
 927         }
 928
 929         while ((rb = rb_first_cached(&sched_engine->queue))) {
 930                 struct i915_priolist *p = to_priolist(rb);
 931                 struct i915_request *rq, *rn;
 932
 933                 priolist_for_each_request_consume(rq, rn, p) {
 934                         if (last && !can_merge_rq(rq, last))
 935                                 goto register_context;
 936
 937                         list_del_init(&rq->sched.link);
 938
 939                         __i915_request_submit(rq);
 940
 941                         trace_i915_request_in(rq, 0);
 942                         last = rq;
 943
 944                         if (is_multi_lrc_rq(rq)) {
 945                                 /*
 946                                  * We need to coalesce all multi-lrc requests in
 947                                  * a relationship into a single H2G. We are
 948                                  * guaranteed that all of these requests will be
 949                                  * submitted sequentially.
 950                                  */
 951                                 if (multi_lrc_submit(rq)) {
 952                                         submit = true;
 953                                         goto register_context;
 954                                 }
 955                         } else {
 956                                 submit = true;
 957                         }
 958                 }
 959
 960                 rb_erase_cached(&p->node, &sched_engine->queue);
 961                 i915_priolist_free(p);
 962         }
 963
 964 register_context:
 965         if (submit) {
 966                 struct intel_context *ce = request_to_scheduling_context(last);
 967
 968                 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) &&
 969                              !intel_context_is_banned(ce))) {
 970                         ret = try_context_registration(ce, false);
 971                         if (unlikely(ret == -EPIPE)) {
 972                                 goto deadlk;
 973                         } else if (ret == -EBUSY) {
 974                                 guc->stalled_request = last;
 975                                 guc->submission_stall_reason =
 976                                         STALL_REGISTER_CONTEXT;
 977                                 goto schedule_tasklet;
 978                         } else if (ret != 0) {
 979                                 GEM_WARN_ON(ret);       /* Unexpected */
 980                                 goto deadlk;
 981                         }
 982                 }
 983
 984 move_lrc_tail:
 985                 if (is_multi_lrc_rq(last)) {
 986                         ret = guc_wq_item_append(guc, last);
 987                         if (ret == -EBUSY) {
 988                                 goto schedule_tasklet;
 989                         } else if (ret != 0) {
 990                                 GEM_WARN_ON(ret);       /* Unexpected */
 991                                 goto deadlk;
 992                         }
 993                 } else {
 994                         guc_set_lrc_tail(last);
 995                 }
 996
 997 add_request:
 998                 ret = guc_add_request(guc, last);
 999                 if (unlikely(ret == -EPIPE)) {
1000                         goto deadlk;
1001                 } else if (ret == -EBUSY) {
1002                         goto schedule_tasklet;
1003                 } else if (ret != 0) {
1004                         GEM_WARN_ON(ret);       /* Unexpected */
1005                         goto deadlk;
1006                 }
1007         }
1008
1009         guc->stalled_request = NULL;
1010         guc->submission_stall_reason = STALL_NONE;
1011         return submit;
1012
1013 deadlk:
1014         sched_engine->tasklet.callback = NULL;
1015         tasklet_disable_nosync(&sched_engine->tasklet);
1016         return false;
1017
1018 schedule_tasklet:
1019         tasklet_schedule(&sched_engine->tasklet);
1020         return false;
1021 }
1022
1023 static void guc_submission_tasklet(struct tasklet_struct *t)
1024 {
1025         struct i915_sched_engine *sched_engine =
1026                 from_tasklet(sched_engine, t, tasklet);
1027         unsigned long flags;
1028         bool loop;
1029
1030         spin_lock_irqsave(&sched_engine->lock, flags);
1031
1032         do {
1033                 loop = guc_dequeue_one_context(sched_engine->private_data);
1034         } while (loop);
1035
1036         i915_sched_engine_reset_on_empty(sched_engine);
1037
1038         spin_unlock_irqrestore(&sched_engine->lock, flags);
1039 }
1040
1041 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir)
1042 {
1043         if (iir & GT_RENDER_USER_INTERRUPT)
1044                 intel_engine_signal_breadcrumbs(engine);
1045 }
1046
1047 static void __guc_context_destroy(struct intel_context *ce);
1048 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce);
1049 static void guc_signal_context_fence(struct intel_context *ce);
1050 static void guc_cancel_context_requests(struct intel_context *ce);
1051 static void guc_blocked_fence_complete(struct intel_context *ce);
1052
1053 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
1054 {
1055         struct intel_context *ce;
1056         unsigned long index, flags;
1057         bool pending_disable, pending_enable, deregister, destroyed, banned;
1058
1059         xa_lock_irqsave(&guc->context_lookup, flags);
1060         xa_for_each(&guc->context_lookup, index, ce) {
1061                 /*
1062                  * Corner case where the ref count on the object is zero but and
1063                  * deregister G2H was lost. In this case we don't touch the ref
1064                  * count and finish the destroy of the context.
1065                  */
1066                 bool do_put = kref_get_unless_zero(&ce->ref);
1067
1068                 xa_unlock(&guc->context_lookup);
1069
1070                 spin_lock(&ce->guc_state.lock);
1071
1072                 /*
1073                  * Once we are at this point submission_disabled() is guaranteed
1074                  * to be visible to all callers who set the below flags (see above
1075                  * flush and flushes in reset_prepare). If submission_disabled()
1076                  * is set, the caller shouldn't set these flags.
1077                  */
1078
1079                 destroyed = context_destroyed(ce);
1080                 pending_enable = context_pending_enable(ce);
1081                 pending_disable = context_pending_disable(ce);
1082                 deregister = context_wait_for_deregister_to_register(ce);
1083                 banned = context_banned(ce);
1084                 init_sched_state(ce);
1085
1086                 spin_unlock(&ce->guc_state.lock);
1087
1088                 if (pending_enable || destroyed || deregister) {
1089                         decr_outstanding_submission_g2h(guc);
1090                         if (deregister)
1091                                 guc_signal_context_fence(ce);
1092                         if (destroyed) {
1093                                 intel_gt_pm_put_async(guc_to_gt(guc));
1094                                 release_guc_id(guc, ce);
1095                                 __guc_context_destroy(ce);
1096                         }
1097                         if (pending_enable || deregister)
1098                                 intel_context_put(ce);
1099                 }
1100
1101                 /* Not mutualy exclusive with above if statement. */
1102                 if (pending_disable) {
1103                         guc_signal_context_fence(ce);
1104                         if (banned) {
1105                                 guc_cancel_context_requests(ce);
1106                                 intel_engine_signal_breadcrumbs(ce->engine);
1107                         }
1108                         intel_context_sched_disable_unpin(ce);
1109                         decr_outstanding_submission_g2h(guc);
1110
1111                         spin_lock(&ce->guc_state.lock);
1112                         guc_blocked_fence_complete(ce);
1113                         spin_unlock(&ce->guc_state.lock);
1114
1115                         intel_context_put(ce);
1116                 }
1117
1118                 if (do_put)
1119                         intel_context_put(ce);
1120                 xa_lock(&guc->context_lookup);
1121         }
1122         xa_unlock_irqrestore(&guc->context_lookup, flags);
1123 }
1124
1125 /*
1126  * GuC stores busyness stats for each engine at context in/out boundaries. A
1127  * context 'in' logs execution start time, 'out' adds in -> out delta to total.
1128  * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with
1129  * GuC.
1130  *
1131  * __i915_pmu_event_read samples engine busyness. When sampling, if context id
1132  * is valid (!= ~0) and start is non-zero, the engine is considered to be
1133  * active. For an active engine total busyness = total + (now - start), where
1134  * 'now' is the time at which the busyness is sampled. For inactive engine,
1135  * total busyness = total.
1136  *
1137  * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain.
1138  *
1139  * The start and total values provided by GuC are 32 bits and wrap around in a
1140  * few minutes. Since perf pmu provides busyness as 64 bit monotonically
1141  * increasing ns values, there is a need for this implementation to account for
1142  * overflows and extend the GuC provided values to 64 bits before returning
1143  * busyness to the user. In order to do that, a worker runs periodically at
1144  * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in
1145  * 27 seconds for a gt clock frequency of 19.2 MHz).
1146  */
1147
1148 #define WRAP_TIME_CLKS U32_MAX
1149 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3)
1150
1151 static void
1152 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
1153 {
1154         u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
1155         u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp);
1156
1157         if (new_start == lower_32_bits(*prev_start))
1158                 return;
1159
1160         /*
1161          * When gt is unparked, we update the gt timestamp and start the ping
1162          * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
1163          * is unparked, all switched in contexts will have a start time that is
1164          * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
1165          *
1166          * If neither gt_stamp nor new_start has rolled over, then the
1167          * gt_stamp_hi does not need to be adjusted, however if one of them has
1168          * rolled over, we need to adjust gt_stamp_hi accordingly.
1169          *
1170          * The below conditions address the cases of new_start rollover and
1171          * gt_stamp_last rollover respectively.
1172          */
1173         if (new_start < gt_stamp_last &&
1174             (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
1175                 gt_stamp_hi++;
1176
1177         if (new_start > gt_stamp_last &&
1178             (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi)
1179                 gt_stamp_hi--;
1180
1181         *prev_start = ((u64)gt_stamp_hi << 32) | new_start;
1182 }
1183
1184 #define record_read(map_, field_) \
1185         iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_)
1186
1187 /*
1188  * GuC updates shared memory and KMD reads it. Since this is not synchronized,
1189  * we run into a race where the value read is inconsistent. Sometimes the
1190  * inconsistency is in reading the upper MSB bytes of the last_in value when
1191  * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
1192  * 24 bits are zero. Since these are non-zero values, it is non-trivial to
1193  * determine validity of these values. Instead we read the values multiple times
1194  * until they are consistent. In test runs, 3 attempts results in consistent
1195  * values. The upper bound is set to 6 attempts and may need to be tuned as per
1196  * any new occurences.
1197  */
1198 static void __get_engine_usage_record(struct intel_engine_cs *engine,
1199                                       u32 *last_in, u32 *id, u32 *total)
1200 {
1201         struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine);
1202         int i = 0;
1203
1204         do {
1205                 *last_in = record_read(&rec_map, last_switch_in_stamp);
1206                 *id = record_read(&rec_map, current_context_index);
1207                 *total = record_read(&rec_map, total_runtime);
1208
1209                 if (record_read(&rec_map, last_switch_in_stamp) == *last_in &&
1210                     record_read(&rec_map, current_context_index) == *id &&
1211                     record_read(&rec_map, total_runtime) == *total)
1212                         break;
1213         } while (++i < 6);
1214 }
1215
1216 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
1217 {
1218         struct intel_engine_guc_stats *stats = &engine->stats.guc;
1219         struct intel_guc *guc = &engine->gt->uc.guc;
1220         u32 last_switch, ctx_id, total;
1221
1222         lockdep_assert_held(&guc->timestamp.lock);
1223
1224         __get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
1225
1226         stats->running = ctx_id != ~0U && last_switch;
1227         if (stats->running)
1228                 __extend_last_switch(guc, &stats->start_gt_clk, last_switch);
1229
1230         /*
1231          * Instead of adjusting the total for overflow, just add the
1232          * difference from previous sample stats->total_gt_clks
1233          */
1234         if (total && total != ~0U) {
1235                 stats->total_gt_clks += (u32)(total - stats->prev_total);
1236                 stats->prev_total = total;
1237         }
1238 }
1239
1240 static u32 gpm_timestamp_shift(struct intel_gt *gt)
1241 {
1242         intel_wakeref_t wakeref;
1243         u32 reg, shift;
1244
1245         with_intel_runtime_pm(gt->uncore->rpm, wakeref)
1246                 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0);
1247
1248         shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
1249                 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;
1250
1251         return 3 - shift;
1252 }
1253
1254 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
1255 {
1256         struct intel_gt *gt = guc_to_gt(guc);
1257         u32 gt_stamp_lo, gt_stamp_hi;
1258         u64 gpm_ts;
1259
1260         lockdep_assert_held(&guc->timestamp.lock);
1261
1262         gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
1263         gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0,
1264                                           MISC_STATUS1) >> guc->timestamp.shift;
1265         gt_stamp_lo = lower_32_bits(gpm_ts);
1266         *now = ktime_get();
1267
1268         if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp))
1269                 gt_stamp_hi++;
1270
1271         guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo;
1272 }
1273
1274 /*
1275  * Unlike the execlist mode of submission total and active times are in terms of
1276  * gt clocks. The *now parameter is retained to return the cpu time at which the
1277  * busyness was sampled.
1278  */
1279 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
1280 {
1281         struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc;
1282         struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
1283         struct intel_gt *gt = engine->gt;
1284         struct intel_guc *guc = &gt->uc.guc;
1285         u64 total, gt_stamp_saved;
1286         unsigned long flags;
1287         u32 reset_count;
1288         bool in_reset;
1289
1290         spin_lock_irqsave(&guc->timestamp.lock, flags);
1291
1292         /*
1293          * If a reset happened, we risk reading partially updated engine
1294          * busyness from GuC, so we just use the driver stored copy of busyness.
1295          * Synchronize with gt reset using reset_count and the
1296          * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count
1297          * after I915_RESET_BACKOFF flag, so ensure that the reset_count is
1298          * usable by checking the flag afterwards.
1299          */
1300         reset_count = i915_reset_count(gpu_error);
1301         in_reset = test_bit(I915_RESET_BACKOFF, &gt->reset.flags);
1302
1303         *now = ktime_get();
1304
1305         /*
1306          * The active busyness depends on start_gt_clk and gt_stamp.
1307          * gt_stamp is updated by i915 only when gt is awake and the
1308          * start_gt_clk is derived from GuC state. To get a consistent
1309          * view of activity, we query the GuC state only if gt is awake.
1310          */
1311         if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
1312                 stats_saved = *stats;
1313                 gt_stamp_saved = guc->timestamp.gt_stamp;
1314                 /*
1315                  * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
1316                  * start_gt_clk' calculation below for active engines.
1317                  */
1318                 guc_update_engine_gt_clks(engine);
1319                 guc_update_pm_timestamp(guc, now);
1320                 intel_gt_pm_put_async(gt);
1321                 if (i915_reset_count(gpu_error) != reset_count) {
1322                         *stats = stats_saved;
1323                         guc->timestamp.gt_stamp = gt_stamp_saved;
1324                 }
1325         }
1326
1327         total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks);
1328         if (stats->running) {
1329                 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
1330
1331                 total += intel_gt_clock_interval_to_ns(gt, clk);
1332         }
1333
1334         spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1335
1336         return ns_to_ktime(total);
1337 }
1338
1339 static void __reset_guc_busyness_stats(struct intel_guc *guc)
1340 {
1341         struct intel_gt *gt = guc_to_gt(guc);
1342         struct intel_engine_cs *engine;
1343         enum intel_engine_id id;
1344         unsigned long flags;
1345         ktime_t unused;
1346
1347         cancel_delayed_work_sync(&guc->timestamp.work);
1348
1349         spin_lock_irqsave(&guc->timestamp.lock, flags);
1350
1351         guc_update_pm_timestamp(guc, &unused);
1352         for_each_engine(engine, gt, id) {
1353                 guc_update_engine_gt_clks(engine);
1354                 engine->stats.guc.prev_total = 0;
1355         }
1356
1357         spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1358 }
1359
1360 static void __update_guc_busyness_stats(struct intel_guc *guc)
1361 {
1362         struct intel_gt *gt = guc_to_gt(guc);
1363         struct intel_engine_cs *engine;
1364         enum intel_engine_id id;
1365         unsigned long flags;
1366         ktime_t unused;
1367
1368         spin_lock_irqsave(&guc->timestamp.lock, flags);
1369
1370         guc_update_pm_timestamp(guc, &unused);
1371         for_each_engine(engine, gt, id)
1372                 guc_update_engine_gt_clks(engine);
1373
1374         spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1375 }
1376
1377 static void guc_timestamp_ping(struct work_struct *wrk)
1378 {
1379         struct intel_guc *guc = container_of(wrk, typeof(*guc),
1380                                              timestamp.work.work);
1381         struct intel_uc *uc = container_of(guc, typeof(*uc), guc);
1382         struct intel_gt *gt = guc_to_gt(guc);
1383         intel_wakeref_t wakeref;
1384         int srcu, ret;
1385
1386         /*
1387          * Synchronize with gt reset to make sure the worker does not
1388          * corrupt the engine/guc stats.
1389          */
1390         ret = intel_gt_reset_trylock(gt, &srcu);
1391         if (ret)
1392                 return;
1393
1394         with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
1395                 __update_guc_busyness_stats(guc);
1396
1397         intel_gt_reset_unlock(gt, srcu);
1398
1399         mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
1400                          guc->timestamp.ping_delay);
1401 }
1402
1403 static int guc_action_enable_usage_stats(struct intel_guc *guc)
1404 {
1405         u32 offset = intel_guc_engine_usage_offset(guc);
1406         u32 action[] = {
1407                 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
1408                 offset,
1409                 0,
1410         };
1411
1412         return intel_guc_send(guc, action, ARRAY_SIZE(action));
1413 }
1414
1415 static void guc_init_engine_stats(struct intel_guc *guc)
1416 {
1417         struct intel_gt *gt = guc_to_gt(guc);
1418         intel_wakeref_t wakeref;
1419
1420         mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
1421                          guc->timestamp.ping_delay);
1422
1423         with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref) {
1424                 int ret = guc_action_enable_usage_stats(guc);
1425
1426                 if (ret)
1427                         drm_err(&gt->i915->drm,
1428                                 "Failed to enable usage stats: %d!\n", ret);
1429         }
1430 }
1431
1432 void intel_guc_busyness_park(struct intel_gt *gt)
1433 {
1434         struct intel_guc *guc = &gt->uc.guc;
1435
1436         if (!guc_submission_initialized(guc))
1437                 return;
1438
1439         cancel_delayed_work(&guc->timestamp.work);
1440         __update_guc_busyness_stats(guc);
1441 }
1442
1443 void intel_guc_busyness_unpark(struct intel_gt *gt)
1444 {
1445         struct intel_guc *guc = &gt->uc.guc;
1446         unsigned long flags;
1447         ktime_t unused;
1448
1449         if (!guc_submission_initialized(guc))
1450                 return;
1451
1452         spin_lock_irqsave(&guc->timestamp.lock, flags);
1453         guc_update_pm_timestamp(guc, &unused);
1454         spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1455         mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
1456                          guc->timestamp.ping_delay);
1457 }
1458
1459 static inline bool
1460 submission_disabled(struct intel_guc *guc)
1461 {
1462         struct i915_sched_engine * const sched_engine = guc->sched_engine;
1463
1464         return unlikely(!sched_engine ||
1465                         !__tasklet_is_enabled(&sched_engine->tasklet) ||
1466                         intel_gt_is_wedged(guc_to_gt(guc)));
1467 }
1468
1469 static void disable_submission(struct intel_guc *guc)
1470 {
1471         struct i915_sched_engine * const sched_engine = guc->sched_engine;
1472
1473         if (__tasklet_is_enabled(&sched_engine->tasklet)) {
1474                 GEM_BUG_ON(!guc->ct.enabled);
1475                 __tasklet_disable_sync_once(&sched_engine->tasklet);
1476                 sched_engine->tasklet.callback = NULL;
1477         }
1478 }
1479
1480 static void enable_submission(struct intel_guc *guc)
1481 {
1482         struct i915_sched_engine * const sched_engine = guc->sched_engine;
1483         unsigned long flags;
1484
1485         spin_lock_irqsave(&guc->sched_engine->lock, flags);
1486         sched_engine->tasklet.callback = guc_submission_tasklet;
1487         wmb();  /* Make sure callback visible */
1488         if (!__tasklet_is_enabled(&sched_engine->tasklet) &&
1489             __tasklet_enable(&sched_engine->tasklet)) {
1490                 GEM_BUG_ON(!guc->ct.enabled);
1491
1492                 /* And kick in case we missed a new request submission. */
1493                 tasklet_hi_schedule(&sched_engine->tasklet);
1494         }
1495         spin_unlock_irqrestore(&guc->sched_engine->lock, flags);
1496 }
1497
1498 static void guc_flush_submissions(struct intel_guc *guc)
1499 {
1500         struct i915_sched_engine * const sched_engine = guc->sched_engine;
1501         unsigned long flags;
1502
1503         spin_lock_irqsave(&sched_engine->lock, flags);
1504         spin_unlock_irqrestore(&sched_engine->lock, flags);
1505 }
1506
1507 static void guc_flush_destroyed_contexts(struct intel_guc *guc);
1508
1509 void intel_guc_submission_reset_prepare(struct intel_guc *guc)
1510 {
1511         if (unlikely(!guc_submission_initialized(guc))) {
1512                 /* Reset called during driver load? GuC not yet initialised! */
1513                 return;
1514         }
1515
1516         intel_gt_park_heartbeats(guc_to_gt(guc));
1517         disable_submission(guc);
1518         guc->interrupts.disable(guc);
1519         __reset_guc_busyness_stats(guc);
1520
1521         /* Flush IRQ handler */
1522         spin_lock_irq(&guc_to_gt(guc)->irq_lock);
1523         spin_unlock_irq(&guc_to_gt(guc)->irq_lock);
1524
1525         guc_flush_submissions(guc);
1526         guc_flush_destroyed_contexts(guc);
1527         flush_work(&guc->ct.requests.worker);
1528
1529         scrub_guc_desc_for_outstanding_g2h(guc);
1530 }
1531
1532 static struct intel_engine_cs *
1533 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling)
1534 {
1535         struct intel_engine_cs *engine;
1536         intel_engine_mask_t tmp, mask = ve->mask;
1537         unsigned int num_siblings = 0;
1538
1539         for_each_engine_masked(engine, ve->gt, mask, tmp)
1540                 if (num_siblings++ == sibling)
1541                         return engine;
1542
1543         return NULL;
1544 }
1545
1546 static inline struct intel_engine_cs *
1547 __context_to_physical_engine(struct intel_context *ce)
1548 {
1549         struct intel_engine_cs *engine = ce->engine;
1550
1551         if (intel_engine_is_virtual(engine))
1552                 engine = guc_virtual_get_sibling(engine, 0);
1553
1554         return engine;
1555 }
1556
1557 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub)
1558 {
1559         struct intel_engine_cs *engine = __context_to_physical_engine(ce);
1560
1561         if (intel_context_is_banned(ce))
1562                 return;
1563
1564         GEM_BUG_ON(!intel_context_is_pinned(ce));
1565
1566         /*
1567          * We want a simple context + ring to execute the breadcrumb update.
1568          * We cannot rely on the context being intact across the GPU hang,
1569          * so clear it and rebuild just what we need for the breadcrumb.
1570          * All pending requests for this context will be zapped, and any
1571          * future request will be after userspace has had the opportunity
1572          * to recreate its own state.
1573          */
1574         if (scrub)
1575                 lrc_init_regs(ce, engine, true);
1576
1577         /* Rerun the request; its payload has been neutered (if guilty). */
1578         lrc_update_regs(ce, engine, head);
1579 }
1580
1581 static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
1582 {
1583         static const i915_reg_t _reg[I915_NUM_ENGINES] = {
1584                 [RCS0] = MSG_IDLE_CS,
1585                 [BCS0] = MSG_IDLE_BCS,
1586                 [VCS0] = MSG_IDLE_VCS0,
1587                 [VCS1] = MSG_IDLE_VCS1,
1588                 [VCS2] = MSG_IDLE_VCS2,
1589                 [VCS3] = MSG_IDLE_VCS3,
1590                 [VCS4] = MSG_IDLE_VCS4,
1591                 [VCS5] = MSG_IDLE_VCS5,
1592                 [VCS6] = MSG_IDLE_VCS6,
1593                 [VCS7] = MSG_IDLE_VCS7,
1594                 [VECS0] = MSG_IDLE_VECS0,
1595                 [VECS1] = MSG_IDLE_VECS1,
1596                 [VECS2] = MSG_IDLE_VECS2,
1597                 [VECS3] = MSG_IDLE_VECS3,
1598                 [CCS0] = MSG_IDLE_CS,
1599                 [CCS1] = MSG_IDLE_CS,
1600                 [CCS2] = MSG_IDLE_CS,
1601                 [CCS3] = MSG_IDLE_CS,
1602         };
1603         u32 val;
1604
1605         if (!_reg[engine->id].reg)
1606                 return 0;
1607
1608         val = intel_uncore_read(engine->uncore, _reg[engine->id]);
1609
1610         /* bits[29:25] & bits[13:9] >> shift */
1611         return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT;
1612 }
1613
1614 static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask)
1615 {
1616         int ret;
1617
1618         /* Ensure GPM receives fw up/down after CS is stopped */
1619         udelay(1);
1620
1621         /* Wait for forcewake request to complete in GPM */
1622         ret =  __intel_wait_for_register_fw(gt->uncore,
1623                                             GEN9_PWRGT_DOMAIN_STATUS,
1624                                             fw_mask, fw_mask, 5000, 0, NULL);
1625
1626         /* Ensure CS receives fw ack from GPM */
1627         udelay(1);
1628
1629         if (ret)
1630                 GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret);
1631 }
1632
1633 /*
1634  * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any
1635  * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The
1636  * pending status is indicated by bits[13:9] (masked by bits[ 29:25]) in the
1637  * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we
1638  * are concerned only with the gt reset here, we use a logical OR of pending
1639  * forcewakeups from all reset domains and then wait for them to complete by
1640  * querying PWRGT_DOMAIN_STATUS.
1641  */
1642 static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
1643 {
1644         u32 fw_pending;
1645
1646         if (GRAPHICS_VER(engine->i915) != 12)
1647                 return;
1648
1649         /*
1650          * Wa_22011802037
1651          * TODO: Occasionally trying to stop the cs times out, but does not
1652          * adversely affect functionality. The timeout is set as a config
1653          * parameter that defaults to 100ms. Assuming that this timeout is
1654          * sufficient for any pending MI_FORCEWAKEs to complete, ignore the
1655          * timeout returned here until it is root caused.
1656          */
1657         intel_engine_stop_cs(engine);
1658
1659         fw_pending = __cs_pending_mi_force_wakes(engine);
1660         if (fw_pending)
1661                 __gpm_wait_for_fw_complete(engine->gt, fw_pending);
1662 }
1663
1664 static void guc_reset_nop(struct intel_engine_cs *engine)
1665 {
1666 }
1667
1668 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled)
1669 {
1670 }
1671
1672 static void
1673 __unwind_incomplete_requests(struct intel_context *ce)
1674 {
1675         struct i915_request *rq, *rn;
1676         struct list_head *pl;
1677         int prio = I915_PRIORITY_INVALID;
1678         struct i915_sched_engine * const sched_engine =
1679                 ce->engine->sched_engine;
1680         unsigned long flags;
1681
1682         spin_lock_irqsave(&sched_engine->lock, flags);
1683         spin_lock(&ce->guc_state.lock);
1684         list_for_each_entry_safe_reverse(rq, rn,
1685                                          &ce->guc_state.requests,
1686                                          sched.link) {
1687                 if (i915_request_completed(rq))
1688                         continue;
1689
1690                 list_del_init(&rq->sched.link);
1691                 __i915_request_unsubmit(rq);
1692
1693                 /* Push the request back into the queue for later resubmission. */
1694                 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1695                 if (rq_prio(rq) != prio) {
1696                         prio = rq_prio(rq);
1697                         pl = i915_sched_lookup_priolist(sched_engine, prio);
1698                 }
1699                 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine));
1700
1701                 list_add(&rq->sched.link, pl);
1702                 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1703         }
1704         spin_unlock(&ce->guc_state.lock);
1705         spin_unlock_irqrestore(&sched_engine->lock, flags);
1706 }
1707
1708 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled)
1709 {
1710         bool guilty;
1711         struct i915_request *rq;
1712         unsigned long flags;
1713         u32 head;
1714         int i, number_children = ce->parallel.number_children;
1715         struct intel_context *parent = ce;
1716
1717         GEM_BUG_ON(intel_context_is_child(ce));
1718
1719         intel_context_get(ce);
1720
1721         /*
1722          * GuC will implicitly mark the context as non-schedulable when it sends
1723          * the reset notification. Make sure our state reflects this change. The
1724          * context will be marked enabled on resubmission.
1725          */
1726         spin_lock_irqsave(&ce->guc_state.lock, flags);
1727         clr_context_enabled(ce);
1728         spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1729
1730         /*
1731          * For each context in the relationship find the hanging request
1732          * resetting each context / request as needed
1733          */
1734         for (i = 0; i < number_children + 1; ++i) {
1735                 if (!intel_context_is_pinned(ce))
1736                         goto next_context;
1737
1738                 guilty = false;
1739                 rq = intel_context_find_active_request(ce);
1740                 if (!rq) {
1741                         head = ce->ring->tail;
1742                         goto out_replay;
1743                 }
1744
1745                 if (i915_request_started(rq))
1746                         guilty = stalled & ce->engine->mask;
1747
1748                 GEM_BUG_ON(i915_active_is_idle(&ce->active));
1749                 head = intel_ring_wrap(ce->ring, rq->head);
1750
1751                 __i915_request_reset(rq, guilty);
1752 out_replay:
1753                 guc_reset_state(ce, head, guilty);
1754 next_context:
1755                 if (i != number_children)
1756                         ce = list_next_entry(ce, parallel.child_link);
1757         }
1758
1759         __unwind_incomplete_requests(parent);
1760         intel_context_put(parent);
1761 }
1762
1763 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled)
1764 {
1765         struct intel_context *ce;
1766         unsigned long index;
1767         unsigned long flags;
1768
1769         if (unlikely(!guc_submission_initialized(guc))) {
1770                 /* Reset called during driver load? GuC not yet initialised! */
1771                 return;
1772         }
1773
1774         xa_lock_irqsave(&guc->context_lookup, flags);
1775         xa_for_each(&guc->context_lookup, index, ce) {
1776                 if (!kref_get_unless_zero(&ce->ref))
1777                         continue;
1778
1779                 xa_unlock(&guc->context_lookup);
1780
1781                 if (intel_context_is_pinned(ce) &&
1782                     !intel_context_is_child(ce))
1783                         __guc_reset_context(ce, stalled);
1784
1785                 intel_context_put(ce);
1786
1787                 xa_lock(&guc->context_lookup);
1788         }
1789         xa_unlock_irqrestore(&guc->context_lookup, flags);
1790
1791         /* GuC is blown away, drop all references to contexts */
1792         xa_destroy(&guc->context_lookup);
1793 }
1794
1795 static void guc_cancel_context_requests(struct intel_context *ce)
1796 {
1797         struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine;
1798         struct i915_request *rq;
1799         unsigned long flags;
1800
1801         /* Mark all executing requests as skipped. */
1802         spin_lock_irqsave(&sched_engine->lock, flags);
1803         spin_lock(&ce->guc_state.lock);
1804         list_for_each_entry(rq, &ce->guc_state.requests, sched.link)
1805                 i915_request_put(i915_request_mark_eio(rq));
1806         spin_unlock(&ce->guc_state.lock);
1807         spin_unlock_irqrestore(&sched_engine->lock, flags);
1808 }
1809
1810 static void
1811 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine)
1812 {
1813         struct i915_request *rq, *rn;
1814         struct rb_node *rb;
1815         unsigned long flags;
1816
1817         /* Can be called during boot if GuC fails to load */
1818         if (!sched_engine)
1819                 return;
1820
1821         /*
1822          * Before we call engine->cancel_requests(), we should have exclusive
1823          * access to the submission state. This is arranged for us by the
1824          * caller disabling the interrupt generation, the tasklet and other
1825          * threads that may then access the same state, giving us a free hand
1826          * to reset state. However, we still need to let lockdep be aware that
1827          * we know this state may be accessed in hardirq context, so we
1828          * disable the irq around this manipulation and we want to keep
1829          * the spinlock focused on its duties and not accidentally conflate
1830          * coverage to the submission's irq state. (Similarly, although we
1831          * shouldn't need to disable irq around the manipulation of the
1832          * submission's irq state, we also wish to remind ourselves that
1833          * it is irq state.)
1834          */
1835         spin_lock_irqsave(&sched_engine->lock, flags);
1836
1837         /* Flush the queued requests to the timeline list (for retiring). */
1838         while ((rb = rb_first_cached(&sched_engine->queue))) {
1839                 struct i915_priolist *p = to_priolist(rb);
1840
1841                 priolist_for_each_request_consume(rq, rn, p) {
1842                         list_del_init(&rq->sched.link);
1843
1844                         __i915_request_submit(rq);
1845
1846                         i915_request_put(i915_request_mark_eio(rq));
1847                 }
1848
1849                 rb_erase_cached(&p->node, &sched_engine->queue);
1850                 i915_priolist_free(p);
1851         }
1852
1853         /* Remaining _unready_ requests will be nop'ed when submitted */
1854
1855         sched_engine->queue_priority_hint = INT_MIN;
1856         sched_engine->queue = RB_ROOT_CACHED;
1857
1858         spin_unlock_irqrestore(&sched_engine->lock, flags);
1859 }
1860
1861 void intel_guc_submission_cancel_requests(struct intel_guc *guc)
1862 {
1863         struct intel_context *ce;
1864         unsigned long index;
1865         unsigned long flags;
1866
1867         xa_lock_irqsave(&guc->context_lookup, flags);
1868         xa_for_each(&guc->context_lookup, index, ce) {
1869                 if (!kref_get_unless_zero(&ce->ref))
1870                         continue;
1871
1872                 xa_unlock(&guc->context_lookup);
1873
1874                 if (intel_context_is_pinned(ce) &&
1875                     !intel_context_is_child(ce))
1876                         guc_cancel_context_requests(ce);
1877
1878                 intel_context_put(ce);
1879
1880                 xa_lock(&guc->context_lookup);
1881         }
1882         xa_unlock_irqrestore(&guc->context_lookup, flags);
1883
1884         guc_cancel_sched_engine_requests(guc->sched_engine);
1885
1886         /* GuC is blown away, drop all references to contexts */
1887         xa_destroy(&guc->context_lookup);
1888 }
1889
1890 void intel_guc_submission_reset_finish(struct intel_guc *guc)
1891 {
1892         /* Reset called during driver load or during wedge? */
1893         if (unlikely(!guc_submission_initialized(guc) ||
1894                      intel_gt_is_wedged(guc_to_gt(guc)))) {
1895                 return;
1896         }
1897
1898         /*
1899          * Technically possible for either of these values to be non-zero here,
1900          * but very unlikely + harmless. Regardless let's add a warn so we can
1901          * see in CI if this happens frequently / a precursor to taking down the
1902          * machine.
1903          */
1904         GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h));
1905         atomic_set(&guc->outstanding_submission_g2h, 0);
1906
1907         intel_guc_global_policies_update(guc);
1908         enable_submission(guc);
1909         intel_gt_unpark_heartbeats(guc_to_gt(guc));
1910 }
1911
1912 static void destroyed_worker_func(struct work_struct *w);
1913 static void reset_fail_worker_func(struct work_struct *w);
1914
1915 /*
1916  * Set up the memory resources to be shared with the GuC (via the GGTT)
1917  * at firmware loading time.
1918  */
1919 int intel_guc_submission_init(struct intel_guc *guc)
1920 {
1921         struct intel_gt *gt = guc_to_gt(guc);
1922         int ret;
1923
1924         if (guc->submission_initialized)
1925                 return 0;
1926
1927         if (guc->fw.major_ver_found < 70) {
1928                 ret = guc_lrc_desc_pool_create_v69(guc);
1929                 if (ret)
1930                         return ret;
1931         }
1932
1933         guc->submission_state.guc_ids_bitmap =
1934                 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
1935         if (!guc->submission_state.guc_ids_bitmap) {
1936                 ret = -ENOMEM;
1937                 goto destroy_pool;
1938         }
1939
1940         guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
1941         guc->timestamp.shift = gpm_timestamp_shift(gt);
1942         guc->submission_initialized = true;
1943
1944         return 0;
1945
1946 destroy_pool:
1947         guc_lrc_desc_pool_destroy_v69(guc);
1948
1949         return ret;
1950 }
1951
1952 void intel_guc_submission_fini(struct intel_guc *guc)
1953 {
1954         if (!guc->submission_initialized)
1955                 return;
1956
1957         guc_flush_destroyed_contexts(guc);
1958         guc_lrc_desc_pool_destroy_v69(guc);
1959         i915_sched_engine_put(guc->sched_engine);
1960         bitmap_free(guc->submission_state.guc_ids_bitmap);
1961         guc->submission_initialized = false;
1962 }
1963
1964 static inline void queue_request(struct i915_sched_engine *sched_engine,
1965                                  struct i915_request *rq,
1966                                  int prio)
1967 {
1968         GEM_BUG_ON(!list_empty(&rq->sched.link));
1969         list_add_tail(&rq->sched.link,
1970                       i915_sched_lookup_priolist(sched_engine, prio));
1971         set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1972         tasklet_hi_schedule(&sched_engine->tasklet);
1973 }
1974
1975 static int guc_bypass_tasklet_submit(struct intel_guc *guc,
1976                                      struct i915_request *rq)
1977 {
1978         int ret = 0;
1979
1980         __i915_request_submit(rq);
1981
1982         trace_i915_request_in(rq, 0);
1983
1984         if (is_multi_lrc_rq(rq)) {
1985                 if (multi_lrc_submit(rq)) {
1986                         ret = guc_wq_item_append(guc, rq);
1987                         if (!ret)
1988                                 ret = guc_add_request(guc, rq);
1989                 }
1990         } else {
1991                 guc_set_lrc_tail(rq);
1992                 ret = guc_add_request(guc, rq);
1993         }
1994
1995         if (unlikely(ret == -EPIPE))
1996                 disable_submission(guc);
1997
1998         return ret;
1999 }
2000
2001 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
2002 {
2003         struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
2004         struct intel_context *ce = request_to_scheduling_context(rq);
2005
2006         return submission_disabled(guc) || guc->stalled_request ||
2007                 !i915_sched_engine_is_empty(sched_engine) ||
2008                 !ctx_id_mapped(guc, ce->guc_id.id);
2009 }
2010
2011 static void guc_submit_request(struct i915_request *rq)
2012 {
2013         struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
2014         struct intel_guc *guc = &rq->engine->gt->uc.guc;
2015         unsigned long flags;
2016
2017         /* Will be called from irq-context when using foreign fences. */
2018         spin_lock_irqsave(&sched_engine->lock, flags);
2019
2020         if (need_tasklet(guc, rq))
2021                 queue_request(sched_engine, rq, rq_prio(rq));
2022         else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
2023                 tasklet_hi_schedule(&sched_engine->tasklet);
2024
2025         spin_unlock_irqrestore(&sched_engine->lock, flags);
2026 }
2027
2028 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
2029 {
2030         int ret;
2031
2032         GEM_BUG_ON(intel_context_is_child(ce));
2033
2034         if (intel_context_is_parent(ce))
2035                 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
2036                                               NUMBER_MULTI_LRC_GUC_ID(guc),
2037                                               order_base_2(ce->parallel.number_children
2038                                                            + 1));
2039         else
2040                 ret = ida_simple_get(&guc->submission_state.guc_ids,
2041                                      NUMBER_MULTI_LRC_GUC_ID(guc),
2042                                      guc->submission_state.num_guc_ids,
2043                                      GFP_KERNEL | __GFP_RETRY_MAYFAIL |
2044                                      __GFP_NOWARN);
2045         if (unlikely(ret < 0))
2046                 return ret;
2047
2048         ce->guc_id.id = ret;
2049         return 0;
2050 }
2051
2052 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
2053 {
2054         GEM_BUG_ON(intel_context_is_child(ce));
2055
2056         if (!context_guc_id_invalid(ce)) {
2057                 if (intel_context_is_parent(ce))
2058                         bitmap_release_region(guc->submission_state.guc_ids_bitmap,
2059                                               ce->guc_id.id,
2060                                               order_base_2(ce->parallel.number_children
2061                                                            + 1));
2062                 else
2063                         ida_simple_remove(&guc->submission_state.guc_ids,
2064                                           ce->guc_id.id);
2065                 clr_ctx_id_mapping(guc, ce->guc_id.id);
2066                 set_context_guc_id_invalid(ce);
2067         }
2068         if (!list_empty(&ce->guc_id.link))
2069                 list_del_init(&ce->guc_id.link);
2070 }
2071
2072 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
2073 {
2074         unsigned long flags;
2075
2076         spin_lock_irqsave(&guc->submission_state.lock, flags);
2077         __release_guc_id(guc, ce);
2078         spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2079 }
2080
2081 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
2082 {
2083         struct intel_context *cn;
2084
2085         lockdep_assert_held(&guc->submission_state.lock);
2086         GEM_BUG_ON(intel_context_is_child(ce));
2087         GEM_BUG_ON(intel_context_is_parent(ce));
2088
2089         if (!list_empty(&guc->submission_state.guc_id_list)) {
2090                 cn = list_first_entry(&guc->submission_state.guc_id_list,
2091                                       struct intel_context,
2092                                       guc_id.link);
2093
2094                 GEM_BUG_ON(atomic_read(&cn->guc_id.ref));
2095                 GEM_BUG_ON(context_guc_id_invalid(cn));
2096                 GEM_BUG_ON(intel_context_is_child(cn));
2097                 GEM_BUG_ON(intel_context_is_parent(cn));
2098
2099                 list_del_init(&cn->guc_id.link);
2100                 ce->guc_id.id = cn->guc_id.id;
2101
2102                 spin_lock(&cn->guc_state.lock);
2103                 clr_context_registered(cn);
2104                 spin_unlock(&cn->guc_state.lock);
2105
2106                 set_context_guc_id_invalid(cn);
2107
2108 #ifdef CONFIG_DRM_I915_SELFTEST
2109                 guc->number_guc_id_stolen++;
2110 #endif
2111
2112                 return 0;
2113         } else {
2114                 return -EAGAIN;
2115         }
2116 }
2117
2118 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce)
2119 {
2120         int ret;
2121
2122         lockdep_assert_held(&guc->submission_state.lock);
2123         GEM_BUG_ON(intel_context_is_child(ce));
2124
2125         ret = new_guc_id(guc, ce);
2126         if (unlikely(ret < 0)) {
2127                 if (intel_context_is_parent(ce))
2128                         return -ENOSPC;
2129
2130                 ret = steal_guc_id(guc, ce);
2131                 if (ret < 0)
2132                         return ret;
2133         }
2134
2135         if (intel_context_is_parent(ce)) {
2136                 struct intel_context *child;
2137                 int i = 1;
2138
2139                 for_each_child(ce, child)
2140                         child->guc_id.id = ce->guc_id.id + i++;
2141         }
2142
2143         return 0;
2144 }
2145
2146 #define PIN_GUC_ID_TRIES        4
2147 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
2148 {
2149         int ret = 0;
2150         unsigned long flags, tries = PIN_GUC_ID_TRIES;
2151
2152         GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
2153
2154 try_again:
2155         spin_lock_irqsave(&guc->submission_state.lock, flags);
2156
2157         might_lock(&ce->guc_state.lock);
2158
2159         if (context_guc_id_invalid(ce)) {
2160                 ret = assign_guc_id(guc, ce);
2161                 if (ret)
2162                         goto out_unlock;
2163                 ret = 1;        /* Indidcates newly assigned guc_id */
2164         }
2165         if (!list_empty(&ce->guc_id.link))
2166                 list_del_init(&ce->guc_id.link);
2167         atomic_inc(&ce->guc_id.ref);
2168
2169 out_unlock:
2170         spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2171
2172         /*
2173          * -EAGAIN indicates no guc_id are available, let's retire any
2174          * outstanding requests to see if that frees up a guc_id. If the first
2175          * retire didn't help, insert a sleep with the timeslice duration before
2176          * attempting to retire more requests. Double the sleep period each
2177          * subsequent pass before finally giving up. The sleep period has max of
2178          * 100ms and minimum of 1ms.
2179          */
2180         if (ret == -EAGAIN && --tries) {
2181                 if (PIN_GUC_ID_TRIES - tries > 1) {
2182                         unsigned int timeslice_shifted =
2183                                 ce->engine->props.timeslice_duration_ms <<
2184                                 (PIN_GUC_ID_TRIES - tries - 2);
2185                         unsigned int max = min_t(unsigned int, 100,
2186                                                  timeslice_shifted);
2187
2188                         msleep(max_t(unsigned int, max, 1));
2189                 }
2190                 intel_gt_retire_requests(guc_to_gt(guc));
2191                 goto try_again;
2192         }
2193
2194         return ret;
2195 }
2196
2197 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
2198 {
2199         unsigned long flags;
2200
2201         GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0);
2202         GEM_BUG_ON(intel_context_is_child(ce));
2203
2204         if (unlikely(context_guc_id_invalid(ce) ||
2205                      intel_context_is_parent(ce)))
2206                 return;
2207
2208         spin_lock_irqsave(&guc->submission_state.lock, flags);
2209         if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) &&
2210             !atomic_read(&ce->guc_id.ref))
2211                 list_add_tail(&ce->guc_id.link,
2212                               &guc->submission_state.guc_id_list);
2213         spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2214 }
2215
2216 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc,
2217                                                struct intel_context *ce,
2218                                                u32 guc_id,
2219                                                u32 offset,
2220                                                bool loop)
2221 {
2222         struct intel_context *child;
2223         u32 action[4 + MAX_ENGINE_INSTANCE];
2224         int len = 0;
2225
2226         GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
2227
2228         action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
2229         action[len++] = guc_id;
2230         action[len++] = ce->parallel.number_children + 1;
2231         action[len++] = offset;
2232         for_each_child(ce, child) {
2233                 offset += sizeof(struct guc_lrc_desc_v69);
2234                 action[len++] = offset;
2235         }
2236
2237         return guc_submission_send_busy_loop(guc, action, len, 0, loop);
2238 }
2239
2240 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc,
2241                                                struct intel_context *ce,
2242                                                struct guc_ctxt_registration_info *info,
2243                                                bool loop)
2244 {
2245         struct intel_context *child;
2246         u32 action[13 + (MAX_ENGINE_INSTANCE * 2)];
2247         int len = 0;
2248         u32 next_id;
2249
2250         GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
2251
2252         action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
2253         action[len++] = info->flags;
2254         action[len++] = info->context_idx;
2255         action[len++] = info->engine_class;
2256         action[len++] = info->engine_submit_mask;
2257         action[len++] = info->wq_desc_lo;
2258         action[len++] = info->wq_desc_hi;
2259         action[len++] = info->wq_base_lo;
2260         action[len++] = info->wq_base_hi;
2261         action[len++] = info->wq_size;
2262         action[len++] = ce->parallel.number_children + 1;
2263         action[len++] = info->hwlrca_lo;
2264         action[len++] = info->hwlrca_hi;
2265
2266         next_id = info->context_idx + 1;
2267         for_each_child(ce, child) {
2268                 GEM_BUG_ON(next_id++ != child->guc_id.id);
2269
2270                 /*
2271                  * NB: GuC interface supports 64 bit LRCA even though i915/HW
2272                  * only supports 32 bit currently.
2273                  */
2274                 action[len++] = lower_32_bits(child->lrc.lrca);
2275                 action[len++] = upper_32_bits(child->lrc.lrca);
2276         }
2277
2278         GEM_BUG_ON(len > ARRAY_SIZE(action));
2279
2280         return guc_submission_send_busy_loop(guc, action, len, 0, loop);
2281 }
2282
2283 static int __guc_action_register_context_v69(struct intel_guc *guc,
2284                                              u32 guc_id,
2285                                              u32 offset,
2286                                              bool loop)
2287 {
2288         u32 action[] = {
2289                 INTEL_GUC_ACTION_REGISTER_CONTEXT,
2290                 guc_id,
2291                 offset,
2292         };
2293
2294         return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2295                                              0, loop);
2296 }
2297
2298 static int __guc_action_register_context_v70(struct intel_guc *guc,
2299                                              struct guc_ctxt_registration_info *info,
2300                                              bool loop)
2301 {
2302         u32 action[] = {
2303                 INTEL_GUC_ACTION_REGISTER_CONTEXT,
2304                 info->flags,
2305                 info->context_idx,
2306                 info->engine_class,
2307                 info->engine_submit_mask,
2308                 info->wq_desc_lo,
2309                 info->wq_desc_hi,
2310                 info->wq_base_lo,
2311                 info->wq_base_hi,
2312                 info->wq_size,
2313                 info->hwlrca_lo,
2314                 info->hwlrca_hi,
2315         };
2316
2317         return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2318                                              0, loop);
2319 }
2320
2321 static void prepare_context_registration_info_v69(struct intel_context *ce);
2322 static void prepare_context_registration_info_v70(struct intel_context *ce,
2323                                                   struct guc_ctxt_registration_info *info);
2324
2325 static int
2326 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop)
2327 {
2328         u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) +
2329                 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69);
2330
2331         prepare_context_registration_info_v69(ce);
2332
2333         if (intel_context_is_parent(ce))
2334                 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id,
2335                                                            offset, loop);
2336         else
2337                 return __guc_action_register_context_v69(guc, ce->guc_id.id,
2338                                                          offset, loop);
2339 }
2340
2341 static int
2342 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop)
2343 {
2344         struct guc_ctxt_registration_info info;
2345
2346         prepare_context_registration_info_v70(ce, &info);
2347
2348         if (intel_context_is_parent(ce))
2349                 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop);
2350         else
2351                 return __guc_action_register_context_v70(guc, &info, loop);
2352 }
2353
2354 static int register_context(struct intel_context *ce, bool loop)
2355 {
2356         struct intel_guc *guc = ce_to_guc(ce);
2357         int ret;
2358
2359         GEM_BUG_ON(intel_context_is_child(ce));
2360         trace_intel_context_register(ce);
2361
2362         if (guc->fw.major_ver_found >= 70)
2363                 ret = register_context_v70(guc, ce, loop);
2364         else
2365                 ret = register_context_v69(guc, ce, loop);
2366
2367         if (likely(!ret)) {
2368                 unsigned long flags;
2369
2370                 spin_lock_irqsave(&ce->guc_state.lock, flags);
2371                 set_context_registered(ce);
2372                 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2373
2374                 if (guc->fw.major_ver_found >= 70)
2375                         guc_context_policy_init_v70(ce, loop);
2376         }
2377
2378         return ret;
2379 }
2380
2381 static int __guc_action_deregister_context(struct intel_guc *guc,
2382                                            u32 guc_id)
2383 {
2384         u32 action[] = {
2385                 INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
2386                 guc_id,
2387         };
2388
2389         return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2390                                              G2H_LEN_DW_DEREGISTER_CONTEXT,
2391                                              true);
2392 }
2393
2394 static int deregister_context(struct intel_context *ce, u32 guc_id)
2395 {
2396         struct intel_guc *guc = ce_to_guc(ce);
2397
2398         GEM_BUG_ON(intel_context_is_child(ce));
2399         trace_intel_context_deregister(ce);
2400
2401         return __guc_action_deregister_context(guc, guc_id);
2402 }
2403
2404 static inline void clear_children_join_go_memory(struct intel_context *ce)
2405 {
2406         struct parent_scratch *ps = __get_parent_scratch(ce);
2407         int i;
2408
2409         ps->go.semaphore = 0;
2410         for (i = 0; i < ce->parallel.number_children + 1; ++i)
2411                 ps->join[i].semaphore = 0;
2412 }
2413
2414 static inline u32 get_children_go_value(struct intel_context *ce)
2415 {
2416         return __get_parent_scratch(ce)->go.semaphore;
2417 }
2418
2419 static inline u32 get_children_join_value(struct intel_context *ce,
2420                                           u8 child_index)
2421 {
2422         return __get_parent_scratch(ce)->join[child_index].semaphore;
2423 }
2424
2425 struct context_policy {
2426         u32 count;
2427         struct guc_update_context_policy h2g;
2428 };
2429
2430 static u32 __guc_context_policy_action_size(struct context_policy *policy)
2431 {
2432         size_t bytes = sizeof(policy->h2g.header) +
2433                        (sizeof(policy->h2g.klv[0]) * policy->count);
2434
2435         return bytes / sizeof(u32);
2436 }
2437
2438 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id)
2439 {
2440         policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
2441         policy->h2g.header.ctx_id = guc_id;
2442         policy->count = 0;
2443 }
2444
2445 #define MAKE_CONTEXT_POLICY_ADD(func, id) \
2446 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \
2447 { \
2448         GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
2449         policy->h2g.klv[policy->count].kl = \
2450                 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
2451                 FIELD_PREP(GUC_KLV_0_LEN, 1); \
2452         policy->h2g.klv[policy->count].value = data; \
2453         policy->count++; \
2454 }
2455
2456 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
2457 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
2458 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY)
2459 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY)
2460
2461 #undef MAKE_CONTEXT_POLICY_ADD
2462
2463 static int __guc_context_set_context_policies(struct intel_guc *guc,
2464                                               struct context_policy *policy,
2465                                               bool loop)
2466 {
2467         return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g,
2468                                         __guc_context_policy_action_size(policy),
2469                                         0, loop);
2470 }
2471
2472 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
2473 {
2474         struct intel_engine_cs *engine = ce->engine;
2475         struct intel_guc *guc = &engine->gt->uc.guc;
2476         struct context_policy policy;
2477         u32 execution_quantum;
2478         u32 preemption_timeout;
2479         bool missing = false;
2480         unsigned long flags;
2481         int ret;
2482
2483         /* NB: For both of these, zero means disabled. */
2484         execution_quantum = engine->props.timeslice_duration_ms * 1000;
2485         preemption_timeout = engine->props.preempt_timeout_ms * 1000;
2486
2487         __guc_context_policy_start_klv(&policy, ce->guc_id.id);
2488
2489         __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
2490         __guc_context_policy_add_execution_quantum(&policy, execution_quantum);
2491         __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
2492
2493         if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
2494                 __guc_context_policy_add_preempt_to_idle(&policy, 1);
2495
2496         ret = __guc_context_set_context_policies(guc, &policy, loop);
2497         missing = ret != 0;
2498
2499         if (!missing && intel_context_is_parent(ce)) {
2500                 struct intel_context *child;
2501
2502                 for_each_child(ce, child) {
2503                         __guc_context_policy_start_klv(&policy, child->guc_id.id);
2504
2505                         if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
2506                                 __guc_context_policy_add_preempt_to_idle(&policy, 1);
2507
2508                         child->guc_state.prio = ce->guc_state.prio;
2509                         __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
2510                         __guc_context_policy_add_execution_quantum(&policy, execution_quantum);
2511                         __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
2512
2513                         ret = __guc_context_set_context_policies(guc, &policy, loop);
2514                         if (ret) {
2515                                 missing = true;
2516                                 break;
2517                         }
2518                 }
2519         }
2520
2521         spin_lock_irqsave(&ce->guc_state.lock, flags);
2522         if (missing)
2523                 set_context_policy_required(ce);
2524         else
2525                 clr_context_policy_required(ce);
2526         spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2527
2528         return ret;
2529 }
2530
2531 static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
2532                                         struct guc_lrc_desc_v69 *desc)
2533 {
2534         desc->policy_flags = 0;
2535
2536         if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
2537                 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
2538
2539         /* NB: For both of these, zero means disabled. */
2540         desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
2541         desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
2542 }
2543
2544 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
2545 {
2546         /*
2547          * this matches the mapping we do in map_i915_prio_to_guc_prio()
2548          * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL)
2549          */
2550         switch (prio) {
2551         default:
2552                 MISSING_CASE(prio);
2553                 fallthrough;
2554         case GUC_CLIENT_PRIORITY_KMD_NORMAL:
2555                 return GEN12_CTX_PRIORITY_NORMAL;
2556         case GUC_CLIENT_PRIORITY_NORMAL:
2557                 return GEN12_CTX_PRIORITY_LOW;
2558         case GUC_CLIENT_PRIORITY_HIGH:
2559         case GUC_CLIENT_PRIORITY_KMD_HIGH:
2560                 return GEN12_CTX_PRIORITY_HIGH;
2561         }
2562 }
2563
2564 static void prepare_context_registration_info_v69(struct intel_context *ce)
2565 {
2566         struct intel_engine_cs *engine = ce->engine;
2567         struct intel_guc *guc = &engine->gt->uc.guc;
2568         u32 ctx_id = ce->guc_id.id;
2569         struct guc_lrc_desc_v69 *desc;
2570         struct intel_context *child;
2571
2572         GEM_BUG_ON(!engine->mask);
2573
2574         /*
2575          * Ensure LRC + CT vmas are is same region as write barrier is done
2576          * based on CT vma region.
2577          */
2578         GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
2579                    i915_gem_object_is_lmem(ce->ring->vma->obj));
2580
2581         desc = __get_lrc_desc_v69(guc, ctx_id);
2582         desc->engine_class = engine_class_to_guc_class(engine->class);
2583         desc->engine_submit_mask = engine->logical_mask;
2584         desc->hw_context_desc = ce->lrc.lrca;
2585         desc->priority = ce->guc_state.prio;
2586         desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
2587         guc_context_policy_init_v69(engine, desc);
2588
2589         /*
2590          * If context is a parent, we need to register a process descriptor
2591          * describing a work queue and register all child contexts.
2592          */
2593         if (intel_context_is_parent(ce)) {
2594                 struct guc_process_desc_v69 *pdesc;
2595
2596                 ce->parallel.guc.wqi_tail = 0;
2597                 ce->parallel.guc.wqi_head = 0;
2598
2599                 desc->process_desc = i915_ggtt_offset(ce->state) +
2600                         __get_parent_scratch_offset(ce);
2601                 desc->wq_addr = i915_ggtt_offset(ce->state) +
2602                         __get_wq_offset(ce);
2603                 desc->wq_size = WQ_SIZE;
2604
2605                 pdesc = __get_process_desc_v69(ce);
2606                 memset(pdesc, 0, sizeof(*(pdesc)));
2607                 pdesc->stage_id = ce->guc_id.id;
2608                 pdesc->wq_base_addr = desc->wq_addr;
2609                 pdesc->wq_size_bytes = desc->wq_size;
2610                 pdesc->wq_status = WQ_STATUS_ACTIVE;
2611
2612                 ce->parallel.guc.wq_head = &pdesc->head;
2613                 ce->parallel.guc.wq_tail = &pdesc->tail;
2614                 ce->parallel.guc.wq_status = &pdesc->wq_status;
2615
2616                 for_each_child(ce, child) {
2617                         desc = __get_lrc_desc_v69(guc, child->guc_id.id);
2618
2619                         desc->engine_class =
2620                                 engine_class_to_guc_class(engine->class);
2621                         desc->hw_context_desc = child->lrc.lrca;
2622                         desc->priority = ce->guc_state.prio;
2623                         desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
2624                         guc_context_policy_init_v69(engine, desc);
2625                 }
2626
2627                 clear_children_join_go_memory(ce);
2628         }
2629 }
2630
2631 static void prepare_context_registration_info_v70(struct intel_context *ce,
2632                                                   struct guc_ctxt_registration_info *info)
2633 {
2634         struct intel_engine_cs *engine = ce->engine;
2635         struct intel_guc *guc = &engine->gt->uc.guc;
2636         u32 ctx_id = ce->guc_id.id;
2637
2638         GEM_BUG_ON(!engine->mask);
2639
2640         /*
2641          * Ensure LRC + CT vmas are is same region as write barrier is done
2642          * based on CT vma region.
2643          */
2644         GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
2645                    i915_gem_object_is_lmem(ce->ring->vma->obj));
2646
2647         memset(info, 0, sizeof(*info));
2648         info->context_idx = ctx_id;
2649         info->engine_class = engine_class_to_guc_class(engine->class);
2650         info->engine_submit_mask = engine->logical_mask;
2651         /*
2652          * NB: GuC interface supports 64 bit LRCA even though i915/HW
2653          * only supports 32 bit currently.
2654          */
2655         info->hwlrca_lo = lower_32_bits(ce->lrc.lrca);
2656         info->hwlrca_hi = upper_32_bits(ce->lrc.lrca);
2657         if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY)
2658                 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio);
2659         info->flags = CONTEXT_REGISTRATION_FLAG_KMD;
2660
2661         /*
2662          * If context is a parent, we need to register a process descriptor
2663          * describing a work queue and register all child contexts.
2664          */
2665         if (intel_context_is_parent(ce)) {
2666                 struct guc_sched_wq_desc *wq_desc;
2667                 u64 wq_desc_offset, wq_base_offset;
2668
2669                 ce->parallel.guc.wqi_tail = 0;
2670                 ce->parallel.guc.wqi_head = 0;
2671
2672                 wq_desc_offset = i915_ggtt_offset(ce->state) +
2673                                  __get_parent_scratch_offset(ce);
2674                 wq_base_offset = i915_ggtt_offset(ce->state) +
2675                                  __get_wq_offset(ce);
2676                 info->wq_desc_lo = lower_32_bits(wq_desc_offset);
2677                 info->wq_desc_hi = upper_32_bits(wq_desc_offset);
2678                 info->wq_base_lo = lower_32_bits(wq_base_offset);
2679                 info->wq_base_hi = upper_32_bits(wq_base_offset);
2680                 info->wq_size = WQ_SIZE;
2681
2682                 wq_desc = __get_wq_desc_v70(ce);
2683                 memset(wq_desc, 0, sizeof(*wq_desc));
2684                 wq_desc->wq_status = WQ_STATUS_ACTIVE;
2685
2686                 ce->parallel.guc.wq_head = &wq_desc->head;
2687                 ce->parallel.guc.wq_tail = &wq_desc->tail;
2688                 ce->parallel.guc.wq_status = &wq_desc->wq_status;
2689
2690                 clear_children_join_go_memory(ce);
2691         }
2692 }
2693
2694 static int try_context_registration(struct intel_context *ce, bool loop)
2695 {
2696         struct intel_engine_cs *engine = ce->engine;
2697         struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
2698         struct intel_guc *guc = &engine->gt->uc.guc;
2699         intel_wakeref_t wakeref;
2700         u32 ctx_id = ce->guc_id.id;
2701         bool context_registered;
2702         int ret = 0;
2703
2704         GEM_BUG_ON(!sched_state_is_init(ce));
2705
2706         context_registered = ctx_id_mapped(guc, ctx_id);
2707
2708         clr_ctx_id_mapping(guc, ctx_id);
2709         set_ctx_id_mapping(guc, ctx_id, ce);
2710
2711         /*
2712          * The context_lookup xarray is used to determine if the hardware
2713          * context is currently registered. There are two cases in which it
2714          * could be registered either the guc_id has been stolen from another
2715          * context or the lrc descriptor address of this context has changed. In
2716          * either case the context needs to be deregistered with the GuC before
2717          * registering this context.
2718          */
2719         if (context_registered) {
2720                 bool disabled;
2721                 unsigned long flags;
2722
2723                 trace_intel_context_steal_guc_id(ce);
2724                 GEM_BUG_ON(!loop);
2725
2726                 /* Seal race with Reset */
2727                 spin_lock_irqsave(&ce->guc_state.lock, flags);
2728                 disabled = submission_disabled(guc);
2729                 if (likely(!disabled)) {
2730                         set_context_wait_for_deregister_to_register(ce);
2731                         intel_context_get(ce);
2732                 }
2733                 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2734                 if (unlikely(disabled)) {
2735                         clr_ctx_id_mapping(guc, ctx_id);
2736                         return 0;       /* Will get registered later */
2737                 }
2738
2739                 /*
2740                  * If stealing the guc_id, this ce has the same guc_id as the
2741                  * context whose guc_id was stolen.
2742                  */
2743                 with_intel_runtime_pm(runtime_pm, wakeref)
2744                         ret = deregister_context(ce, ce->guc_id.id);
2745                 if (unlikely(ret == -ENODEV))
2746                         ret = 0;        /* Will get registered later */
2747         } else {
2748                 with_intel_runtime_pm(runtime_pm, wakeref)
2749                         ret = register_context(ce, loop);
2750                 if (unlikely(ret == -EBUSY)) {
2751                         clr_ctx_id_mapping(guc, ctx_id);
2752                 } else if (unlikely(ret == -ENODEV)) {
2753                         clr_ctx_id_mapping(guc, ctx_id);
2754                         ret = 0;        /* Will get registered later */
2755                 }
2756         }
2757
2758         return ret;
2759 }
2760
2761 static int __guc_context_pre_pin(struct intel_context *ce,
2762                                  struct intel_engine_cs *engine,
2763                                  struct i915_gem_ww_ctx *ww,
2764                                  void **vaddr)
2765 {
2766         return lrc_pre_pin(ce, engine, ww, vaddr);
2767 }
2768
2769 static int __guc_context_pin(struct intel_context *ce,
2770                              struct intel_engine_cs *engine,
2771                              void *vaddr)
2772 {
2773         if (i915_ggtt_offset(ce->state) !=
2774             (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
2775                 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
2776
2777         /*
2778          * GuC context gets pinned in guc_request_alloc. See that function for
2779          * explaination of why.
2780          */
2781
2782         return lrc_pin(ce, engine, vaddr);
2783 }
2784
2785 static int guc_context_pre_pin(struct intel_context *ce,
2786                                struct i915_gem_ww_ctx *ww,
2787                                void **vaddr)
2788 {
2789         return __guc_context_pre_pin(ce, ce->engine, ww, vaddr);
2790 }
2791
2792 static int guc_context_pin(struct intel_context *ce, void *vaddr)
2793 {
2794         int ret = __guc_context_pin(ce, ce->engine, vaddr);
2795
2796         if (likely(!ret && !intel_context_is_barrier(ce)))
2797                 intel_engine_pm_get(ce->engine);
2798
2799         return ret;
2800 }
2801
2802 static void guc_context_unpin(struct intel_context *ce)
2803 {
2804         struct intel_guc *guc = ce_to_guc(ce);
2805
2806         unpin_guc_id(guc, ce);
2807         lrc_unpin(ce);
2808
2809         if (likely(!intel_context_is_barrier(ce)))
2810                 intel_engine_pm_put_async(ce->engine);
2811 }
2812
2813 static void guc_context_post_unpin(struct intel_context *ce)
2814 {
2815         lrc_post_unpin(ce);
2816 }
2817
2818 static void __guc_context_sched_enable(struct intel_guc *guc,
2819                                        struct intel_context *ce)
2820 {
2821         u32 action[] = {
2822                 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2823                 ce->guc_id.id,
2824                 GUC_CONTEXT_ENABLE
2825         };
2826
2827         trace_intel_context_sched_enable(ce);
2828
2829         guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2830                                       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2831 }
2832
2833 static void __guc_context_sched_disable(struct intel_guc *guc,
2834                                         struct intel_context *ce,
2835                                         u16 guc_id)
2836 {
2837         u32 action[] = {
2838                 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2839                 guc_id, /* ce->guc_id.id not stable */
2840                 GUC_CONTEXT_DISABLE
2841         };
2842
2843         GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID);
2844
2845         GEM_BUG_ON(intel_context_is_child(ce));
2846         trace_intel_context_sched_disable(ce);
2847
2848         guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2849                                       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2850 }
2851
2852 static void guc_blocked_fence_complete(struct intel_context *ce)
2853 {
2854         lockdep_assert_held(&ce->guc_state.lock);
2855
2856         if (!i915_sw_fence_done(&ce->guc_state.blocked))
2857                 i915_sw_fence_complete(&ce->guc_state.blocked);
2858 }
2859
2860 static void guc_blocked_fence_reinit(struct intel_context *ce)
2861 {
2862         lockdep_assert_held(&ce->guc_state.lock);
2863         GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked));
2864
2865         /*
2866          * This fence is always complete unless a pending schedule disable is
2867          * outstanding. We arm the fence here and complete it when we receive
2868          * the pending schedule disable complete message.
2869          */
2870         i915_sw_fence_fini(&ce->guc_state.blocked);
2871         i915_sw_fence_reinit(&ce->guc_state.blocked);
2872         i915_sw_fence_await(&ce->guc_state.blocked);
2873         i915_sw_fence_commit(&ce->guc_state.blocked);
2874 }
2875
2876 static u16 prep_context_pending_disable(struct intel_context *ce)
2877 {
2878         lockdep_assert_held(&ce->guc_state.lock);
2879
2880         set_context_pending_disable(ce);
2881         clr_context_enabled(ce);
2882         guc_blocked_fence_reinit(ce);
2883         intel_context_get(ce);
2884
2885         return ce->guc_id.id;
2886 }
2887
2888 static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
2889 {
2890         struct intel_guc *guc = ce_to_guc(ce);
2891         unsigned long flags;
2892         struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2893         intel_wakeref_t wakeref;
2894         u16 guc_id;
2895         bool enabled;
2896
2897         GEM_BUG_ON(intel_context_is_child(ce));
2898
2899         spin_lock_irqsave(&ce->guc_state.lock, flags);
2900
2901         incr_context_blocked(ce);
2902
2903         enabled = context_enabled(ce);
2904         if (unlikely(!enabled || submission_disabled(guc))) {
2905                 if (enabled)
2906                         clr_context_enabled(ce);
2907                 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2908                 return &ce->guc_state.blocked;
2909         }
2910
2911         /*
2912          * We add +2 here as the schedule disable complete CTB handler calls
2913          * intel_context_sched_disable_unpin (-2 to pin_count).
2914          */
2915         atomic_add(2, &ce->pin_count);
2916
2917         guc_id = prep_context_pending_disable(ce);
2918
2919         spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2920
2921         with_intel_runtime_pm(runtime_pm, wakeref)
2922                 __guc_context_sched_disable(guc, ce, guc_id);
2923
2924         return &ce->guc_state.blocked;
2925 }
2926
2927 #define SCHED_STATE_MULTI_BLOCKED_MASK \
2928         (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED)
2929 #define SCHED_STATE_NO_UNBLOCK \
2930         (SCHED_STATE_MULTI_BLOCKED_MASK | \
2931          SCHED_STATE_PENDING_DISABLE | \
2932          SCHED_STATE_BANNED)
2933
2934 static bool context_cant_unblock(struct intel_context *ce)
2935 {
2936         lockdep_assert_held(&ce->guc_state.lock);
2937
2938         return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) ||
2939                 context_guc_id_invalid(ce) ||
2940                 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) ||
2941                 !intel_context_is_pinned(ce);
2942 }
2943
2944 static void guc_context_unblock(struct intel_context *ce)
2945 {
2946         struct intel_guc *guc = ce_to_guc(ce);
2947         unsigned long flags;
2948         struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2949         intel_wakeref_t wakeref;
2950         bool enable;
2951
2952         GEM_BUG_ON(context_enabled(ce));
2953         GEM_BUG_ON(intel_context_is_child(ce));
2954
2955         spin_lock_irqsave(&ce->guc_state.lock, flags);
2956
2957         if (unlikely(submission_disabled(guc) ||
2958                      context_cant_unblock(ce))) {
2959                 enable = false;
2960         } else {
2961                 enable = true;
2962                 set_context_pending_enable(ce);
2963                 set_context_enabled(ce);
2964                 intel_context_get(ce);
2965         }
2966
2967         decr_context_blocked(ce);
2968
2969         spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2970
2971         if (enable) {
2972                 with_intel_runtime_pm(runtime_pm, wakeref)
2973                         __guc_context_sched_enable(guc, ce);
2974         }
2975 }
2976
2977 static void guc_context_cancel_request(struct intel_context *ce,
2978                                        struct i915_request *rq)
2979 {
2980         struct intel_context *block_context =
2981                 request_to_scheduling_context(rq);
2982
2983         if (i915_sw_fence_signaled(&rq->submit)) {
2984                 struct i915_sw_fence *fence;
2985
2986                 intel_context_get(ce);
2987                 fence = guc_context_block(block_context);
2988                 i915_sw_fence_wait(fence);
2989                 if (!i915_request_completed(rq)) {
2990                         __i915_request_skip(rq);
2991                         guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head),
2992                                         true);
2993                 }
2994
2995                 guc_context_unblock(block_context);
2996                 intel_context_put(ce);
2997         }
2998 }
2999
3000 static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
3001                                                  u16 guc_id,
3002                                                  u32 preemption_timeout)
3003 {
3004         if (guc->fw.major_ver_found >= 70) {
3005                 struct context_policy policy;
3006
3007                 __guc_context_policy_start_klv(&policy, guc_id);
3008                 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
3009                 __guc_context_set_context_policies(guc, &policy, true);
3010         } else {
3011                 u32 action[] = {
3012                         INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT,
3013                         guc_id,
3014                         preemption_timeout
3015                 };
3016
3017                 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
3018         }
3019 }
3020
3021 static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
3022 {
3023         struct intel_guc *guc = ce_to_guc(ce);
3024         struct intel_runtime_pm *runtime_pm =
3025                 &ce->engine->gt->i915->runtime_pm;
3026         intel_wakeref_t wakeref;
3027         unsigned long flags;
3028
3029         GEM_BUG_ON(intel_context_is_child(ce));
3030
3031         guc_flush_submissions(guc);
3032
3033         spin_lock_irqsave(&ce->guc_state.lock, flags);
3034         set_context_banned(ce);
3035
3036         if (submission_disabled(guc) ||
3037             (!context_enabled(ce) && !context_pending_disable(ce))) {
3038                 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3039
3040                 guc_cancel_context_requests(ce);
3041                 intel_engine_signal_breadcrumbs(ce->engine);
3042         } else if (!context_pending_disable(ce)) {
3043                 u16 guc_id;
3044
3045                 /*
3046                  * We add +2 here as the schedule disable complete CTB handler
3047                  * calls intel_context_sched_disable_unpin (-2 to pin_count).
3048                  */
3049                 atomic_add(2, &ce->pin_count);
3050
3051                 guc_id = prep_context_pending_disable(ce);
3052                 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3053
3054                 /*
3055                  * In addition to disabling scheduling, set the preemption
3056                  * timeout to the minimum value (1 us) so the banned context
3057                  * gets kicked off the HW ASAP.
3058                  */
3059                 with_intel_runtime_pm(runtime_pm, wakeref) {
3060                         __guc_context_set_preemption_timeout(guc, guc_id, 1);
3061                         __guc_context_sched_disable(guc, ce, guc_id);
3062                 }
3063         } else {
3064                 if (!context_guc_id_invalid(ce))
3065                         with_intel_runtime_pm(runtime_pm, wakeref)
3066                                 __guc_context_set_preemption_timeout(guc,
3067                                                                      ce->guc_id.id,
3068                                                                      1);
3069                 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3070         }
3071 }
3072
3073 static void guc_context_sched_disable(struct intel_context *ce)
3074 {
3075         struct intel_guc *guc = ce_to_guc(ce);
3076         unsigned long flags;
3077         struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
3078         intel_wakeref_t wakeref;
3079         u16 guc_id;
3080
3081         GEM_BUG_ON(intel_context_is_child(ce));
3082
3083         spin_lock_irqsave(&ce->guc_state.lock, flags);
3084
3085         /*
3086          * We have to check if the context has been disabled by another thread,
3087          * check if submssion has been disabled to seal a race with reset and
3088          * finally check if any more requests have been committed to the
3089          * context ensursing that a request doesn't slip through the
3090          * 'context_pending_disable' fence.
3091          */
3092         if (unlikely(!context_enabled(ce) || submission_disabled(guc) ||
3093                      context_has_committed_requests(ce))) {
3094                 clr_context_enabled(ce);
3095                 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3096                 goto unpin;
3097         }
3098         guc_id = prep_context_pending_disable(ce);
3099
3100         spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3101
3102         with_intel_runtime_pm(runtime_pm, wakeref)
3103                 __guc_context_sched_disable(guc, ce, guc_id);
3104
3105         return;
3106 unpin:
3107         intel_context_sched_disable_unpin(ce);
3108 }
3109
3110 static inline void guc_lrc_desc_unpin(struct intel_context *ce)
3111 {
3112         struct intel_guc *guc = ce_to_guc(ce);
3113         struct intel_gt *gt = guc_to_gt(guc);
3114         unsigned long flags;
3115         bool disabled;
3116
3117         GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
3118         GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id));
3119         GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
3120         GEM_BUG_ON(context_enabled(ce));
3121
3122         /* Seal race with Reset */
3123         spin_lock_irqsave(&ce->guc_state.lock, flags);
3124         disabled = submission_disabled(guc);
3125         if (likely(!disabled)) {
3126                 __intel_gt_pm_get(gt);
3127                 set_context_destroyed(ce);
3128                 clr_context_registered(ce);
3129         }
3130         spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3131         if (unlikely(disabled)) {
3132                 release_guc_id(guc, ce);
3133                 __guc_context_destroy(ce);
3134                 return;
3135         }
3136
3137         deregister_context(ce, ce->guc_id.id);
3138 }
3139
3140 static void __guc_context_destroy(struct intel_context *ce)
3141 {
3142         GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] ||
3143                    ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
3144                    ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
3145                    ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
3146         GEM_BUG_ON(ce->guc_state.number_committed_requests);
3147
3148         lrc_fini(ce);
3149         intel_context_fini(ce);
3150
3151         if (intel_engine_is_virtual(ce->engine)) {
3152                 struct guc_virtual_engine *ve =
3153                         container_of(ce, typeof(*ve), context);
3154
3155                 if (ve->base.breadcrumbs)
3156                         intel_breadcrumbs_put(ve->base.breadcrumbs);
3157
3158                 kfree(ve);
3159         } else {
3160                 intel_context_free(ce);
3161         }
3162 }
3163
3164 static void guc_flush_destroyed_contexts(struct intel_guc *guc)
3165 {
3166         struct intel_context *ce;
3167         unsigned long flags;
3168
3169         GEM_BUG_ON(!submission_disabled(guc) &&
3170                    guc_submission_initialized(guc));
3171
3172         while (!list_empty(&guc->submission_state.destroyed_contexts)) {
3173                 spin_lock_irqsave(&guc->submission_state.lock, flags);
3174                 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
3175                                               struct intel_context,
3176                                               destroyed_link);
3177                 if (ce)
3178                         list_del_init(&ce->destroyed_link);
3179                 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3180
3181                 if (!ce)
3182                         break;
3183
3184                 release_guc_id(guc, ce);
3185                 __guc_context_destroy(ce);
3186         }
3187 }
3188
3189 static void deregister_destroyed_contexts(struct intel_guc *guc)
3190 {
3191         struct intel_context *ce;
3192         unsigned long flags;
3193
3194         while (!list_empty(&guc->submission_state.destroyed_contexts)) {
3195                 spin_lock_irqsave(&guc->submission_state.lock, flags);
3196                 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
3197                                               struct intel_context,
3198                                               destroyed_link);
3199                 if (ce)
3200                         list_del_init(&ce->destroyed_link);
3201                 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3202
3203                 if (!ce)
3204                         break;
3205
3206                 guc_lrc_desc_unpin(ce);
3207         }
3208 }
3209
3210 static void destroyed_worker_func(struct work_struct *w)
3211 {
3212         struct intel_guc *guc = container_of(w, struct intel_guc,
3213                                              submission_state.destroyed_worker);
3214         struct intel_gt *gt = guc_to_gt(guc);
3215         int tmp;
3216
3217         with_intel_gt_pm(gt, tmp)
3218                 deregister_destroyed_contexts(guc);
3219 }
3220
3221 static void guc_context_destroy(struct kref *kref)
3222 {
3223         struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3224         struct intel_guc *guc = ce_to_guc(ce);
3225         unsigned long flags;
3226         bool destroy;
3227
3228         /*
3229          * If the guc_id is invalid this context has been stolen and we can free
3230          * it immediately. Also can be freed immediately if the context is not
3231          * registered with the GuC or the GuC is in the middle of a reset.
3232          */
3233         spin_lock_irqsave(&guc->submission_state.lock, flags);
3234         destroy = submission_disabled(guc) || context_guc_id_invalid(ce) ||
3235                 !ctx_id_mapped(guc, ce->guc_id.id);
3236         if (likely(!destroy)) {
3237                 if (!list_empty(&ce->guc_id.link))
3238                         list_del_init(&ce->guc_id.link);
3239                 list_add_tail(&ce->destroyed_link,
3240                               &guc->submission_state.destroyed_contexts);
3241         } else {
3242                 __release_guc_id(guc, ce);
3243         }
3244         spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3245         if (unlikely(destroy)) {
3246                 __guc_context_destroy(ce);
3247                 return;
3248         }
3249
3250         /*
3251          * We use a worker to issue the H2G to deregister the context as we can
3252          * take the GT PM for the first time which isn't allowed from an atomic
3253          * context.
3254          */
3255         queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker);
3256 }
3257
3258 static int guc_context_alloc(struct intel_context *ce)
3259 {
3260         return lrc_alloc(ce, ce->engine);
3261 }
3262
3263 static void __guc_context_set_prio(struct intel_guc *guc,
3264                                    struct intel_context *ce)
3265 {
3266         if (guc->fw.major_ver_found >= 70) {
3267                 struct context_policy policy;
3268
3269                 __guc_context_policy_start_klv(&policy, ce->guc_id.id);
3270                 __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
3271                 __guc_context_set_context_policies(guc, &policy, true);
3272         } else {
3273                 u32 action[] = {
3274                         INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY,
3275                         ce->guc_id.id,
3276                         ce->guc_state.prio,
3277                 };
3278
3279                 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
3280         }
3281 }
3282
3283 static void guc_context_set_prio(struct intel_guc *guc,
3284                                  struct intel_context *ce,
3285                                  u8 prio)
3286 {
3287         GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH ||
3288                    prio > GUC_CLIENT_PRIORITY_NORMAL);
3289         lockdep_assert_held(&ce->guc_state.lock);
3290
3291         if (ce->guc_state.prio == prio || submission_disabled(guc) ||
3292             !context_registered(ce)) {
3293                 ce->guc_state.prio = prio;
3294                 return;
3295         }
3296
3297         ce->guc_state.prio = prio;
3298         __guc_context_set_prio(guc, ce);
3299
3300         trace_intel_context_set_prio(ce);
3301 }
3302
3303 static inline u8 map_i915_prio_to_guc_prio(int prio)
3304 {
3305         if (prio == I915_PRIORITY_NORMAL)
3306                 return GUC_CLIENT_PRIORITY_KMD_NORMAL;
3307         else if (prio < I915_PRIORITY_NORMAL)
3308                 return GUC_CLIENT_PRIORITY_NORMAL;
3309         else if (prio < I915_PRIORITY_DISPLAY)
3310                 return GUC_CLIENT_PRIORITY_HIGH;
3311         else
3312                 return GUC_CLIENT_PRIORITY_KMD_HIGH;
3313 }
3314
3315 static inline void add_context_inflight_prio(struct intel_context *ce,
3316                                              u8 guc_prio)
3317 {
3318         lockdep_assert_held(&ce->guc_state.lock);
3319         GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
3320
3321         ++ce->guc_state.prio_count[guc_prio];
3322
3323         /* Overflow protection */
3324         GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
3325 }
3326
3327 static inline void sub_context_inflight_prio(struct intel_context *ce,
3328                                              u8 guc_prio)
3329 {
3330         lockdep_assert_held(&ce->guc_state.lock);
3331         GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
3332
3333         /* Underflow protection */
3334         GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
3335
3336         --ce->guc_state.prio_count[guc_prio];
3337 }
3338
3339 static inline void update_context_prio(struct intel_context *ce)
3340 {
3341         struct intel_guc *guc = &ce->engine->gt->uc.guc;
3342         int i;
3343
3344         BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0);
3345         BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL);
3346
3347         lockdep_assert_held(&ce->guc_state.lock);
3348
3349         for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) {
3350                 if (ce->guc_state.prio_count[i]) {
3351                         guc_context_set_prio(guc, ce, i);
3352                         break;
3353                 }
3354         }
3355 }
3356
3357 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio)
3358 {
3359         /* Lower value is higher priority */
3360         return new_guc_prio < old_guc_prio;
3361 }
3362
3363 static void add_to_context(struct i915_request *rq)
3364 {
3365         struct intel_context *ce = request_to_scheduling_context(rq);
3366         u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq));
3367
3368         GEM_BUG_ON(intel_context_is_child(ce));
3369         GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI);
3370
3371         spin_lock(&ce->guc_state.lock);
3372         list_move_tail(&rq->sched.link, &ce->guc_state.requests);
3373
3374         if (rq->guc_prio == GUC_PRIO_INIT) {
3375                 rq->guc_prio = new_guc_prio;
3376                 add_context_inflight_prio(ce, rq->guc_prio);
3377         } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) {
3378                 sub_context_inflight_prio(ce, rq->guc_prio);
3379                 rq->guc_prio = new_guc_prio;
3380                 add_context_inflight_prio(ce, rq->guc_prio);
3381         }
3382         update_context_prio(ce);
3383
3384         spin_unlock(&ce->guc_state.lock);
3385 }
3386
3387 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce)
3388 {
3389         lockdep_assert_held(&ce->guc_state.lock);
3390
3391         if (rq->guc_prio != GUC_PRIO_INIT &&
3392             rq->guc_prio != GUC_PRIO_FINI) {
3393                 sub_context_inflight_prio(ce, rq->guc_prio);
3394                 update_context_prio(ce);
3395         }
3396         rq->guc_prio = GUC_PRIO_FINI;
3397 }
3398
3399 static void remove_from_context(struct i915_request *rq)
3400 {
3401         struct intel_context *ce = request_to_scheduling_context(rq);
3402
3403         GEM_BUG_ON(intel_context_is_child(ce));
3404
3405         spin_lock_irq(&ce->guc_state.lock);
3406
3407         list_del_init(&rq->sched.link);
3408         clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3409
3410         /* Prevent further __await_execution() registering a cb, then flush */
3411         set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
3412
3413         guc_prio_fini(rq, ce);
3414
3415         decr_context_committed_requests(ce);
3416
3417         spin_unlock_irq(&ce->guc_state.lock);
3418
3419         atomic_dec(&ce->guc_id.ref);
3420         i915_request_notify_execute_cb_imm(rq);
3421 }
3422
3423 static const struct intel_context_ops guc_context_ops = {
3424         .alloc = guc_context_alloc,
3425
3426         .pre_pin = guc_context_pre_pin,
3427         .pin = guc_context_pin,
3428         .unpin = guc_context_unpin,
3429         .post_unpin = guc_context_post_unpin,
3430
3431         .ban = guc_context_ban,
3432
3433         .cancel_request = guc_context_cancel_request,
3434
3435         .enter = intel_context_enter_engine,
3436         .exit = intel_context_exit_engine,
3437
3438         .sched_disable = guc_context_sched_disable,
3439
3440         .reset = lrc_reset,
3441         .destroy = guc_context_destroy,
3442
3443         .create_virtual = guc_create_virtual,
3444         .create_parallel = guc_create_parallel,
3445 };
3446
3447 static void submit_work_cb(struct irq_work *wrk)
3448 {
3449         struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work);
3450
3451         might_lock(&rq->engine->sched_engine->lock);
3452         i915_sw_fence_complete(&rq->submit);
3453 }
3454
3455 static void __guc_signal_context_fence(struct intel_context *ce)
3456 {
3457         struct i915_request *rq, *rn;
3458
3459         lockdep_assert_held(&ce->guc_state.lock);
3460
3461         if (!list_empty(&ce->guc_state.fences))
3462                 trace_intel_context_fence_release(ce);
3463
3464         /*
3465          * Use an IRQ to ensure locking order of sched_engine->lock ->
3466          * ce->guc_state.lock is preserved.
3467          */
3468         list_for_each_entry_safe(rq, rn, &ce->guc_state.fences,
3469                                  guc_fence_link) {
3470                 list_del(&rq->guc_fence_link);
3471                 irq_work_queue(&rq->submit_work);
3472         }
3473
3474         INIT_LIST_HEAD(&ce->guc_state.fences);
3475 }
3476
3477 static void guc_signal_context_fence(struct intel_context *ce)
3478 {
3479         unsigned long flags;
3480
3481         GEM_BUG_ON(intel_context_is_child(ce));
3482
3483         spin_lock_irqsave(&ce->guc_state.lock, flags);
3484         clr_context_wait_for_deregister_to_register(ce);
3485         __guc_signal_context_fence(ce);
3486         spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3487 }
3488
3489 static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
3490 {
3491         return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
3492                 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) &&
3493                 !submission_disabled(ce_to_guc(ce));
3494 }
3495
3496 static void guc_context_init(struct intel_context *ce)
3497 {
3498         const struct i915_gem_context *ctx;
3499         int prio = I915_CONTEXT_DEFAULT_PRIORITY;
3500
3501         rcu_read_lock();
3502         ctx = rcu_dereference(ce->gem_context);
3503         if (ctx)
3504                 prio = ctx->sched.priority;
3505         rcu_read_unlock();
3506
3507         ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
3508         set_bit(CONTEXT_GUC_INIT, &ce->flags);
3509 }
3510
3511 static int guc_request_alloc(struct i915_request *rq)
3512 {
3513         struct intel_context *ce = request_to_scheduling_context(rq);
3514         struct intel_guc *guc = ce_to_guc(ce);
3515         unsigned long flags;
3516         int ret;
3517
3518         GEM_BUG_ON(!intel_context_is_pinned(rq->context));
3519
3520         /*
3521          * Flush enough space to reduce the likelihood of waiting after
3522          * we start building the request - in which case we will just
3523          * have to repeat work.
3524          */
3525         rq->reserved_space += GUC_REQUEST_SIZE;
3526
3527         /*
3528          * Note that after this point, we have committed to using
3529          * this request as it is being used to both track the
3530          * state of engine initialisation and liveness of the
3531          * golden renderstate above. Think twice before you try
3532          * to cancel/unwind this request now.
3533          */
3534
3535         /* Unconditionally invalidate GPU caches and TLBs. */
3536         ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
3537         if (ret)
3538                 return ret;
3539
3540         rq->reserved_space -= GUC_REQUEST_SIZE;
3541
3542         if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags)))
3543                 guc_context_init(ce);
3544
3545         /*
3546          * Call pin_guc_id here rather than in the pinning step as with
3547          * dma_resv, contexts can be repeatedly pinned / unpinned trashing the
3548          * guc_id and creating horrible race conditions. This is especially bad
3549          * when guc_id are being stolen due to over subscription. By the time
3550          * this function is reached, it is guaranteed that the guc_id will be
3551          * persistent until the generated request is retired. Thus, sealing these
3552          * race conditions. It is still safe to fail here if guc_id are
3553          * exhausted and return -EAGAIN to the user indicating that they can try
3554          * again in the future.
3555          *
3556          * There is no need for a lock here as the timeline mutex ensures at
3557          * most one context can be executing this code path at once. The
3558          * guc_id_ref is incremented once for every request in flight and
3559          * decremented on each retire. When it is zero, a lock around the
3560          * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
3561          */
3562         if (atomic_add_unless(&ce->guc_id.ref, 1, 0))
3563                 goto out;
3564
3565         ret = pin_guc_id(guc, ce);      /* returns 1 if new guc_id assigned */
3566         if (unlikely(ret < 0))
3567                 return ret;
3568         if (context_needs_register(ce, !!ret)) {
3569                 ret = try_context_registration(ce, true);
3570                 if (unlikely(ret)) {    /* unwind */
3571                         if (ret == -EPIPE) {
3572                                 disable_submission(guc);
3573                                 goto out;       /* GPU will be reset */
3574                         }
3575                         atomic_dec(&ce->guc_id.ref);
3576                         unpin_guc_id(guc, ce);
3577                         return ret;
3578                 }
3579         }
3580
3581         clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
3582
3583 out:
3584         /*
3585          * We block all requests on this context if a G2H is pending for a
3586          * schedule disable or context deregistration as the GuC will fail a
3587          * schedule enable or context registration if either G2H is pending
3588          * respectfully. Once a G2H returns, the fence is released that is
3589          * blocking these requests (see guc_signal_context_fence).
3590          */
3591         spin_lock_irqsave(&ce->guc_state.lock, flags);
3592         if (context_wait_for_deregister_to_register(ce) ||
3593             context_pending_disable(ce)) {
3594                 init_irq_work(&rq->submit_work, submit_work_cb);
3595                 i915_sw_fence_await(&rq->submit);
3596
3597                 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
3598         }
3599         incr_context_committed_requests(ce);
3600         spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3601
3602         return 0;
3603 }
3604
3605 static int guc_virtual_context_pre_pin(struct intel_context *ce,
3606                                        struct i915_gem_ww_ctx *ww,
3607                                        void **vaddr)
3608 {
3609         struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3610
3611         return __guc_context_pre_pin(ce, engine, ww, vaddr);
3612 }
3613
3614 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr)
3615 {
3616         struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3617         int ret = __guc_context_pin(ce, engine, vaddr);
3618         intel_engine_mask_t tmp, mask = ce->engine->mask;
3619
3620         if (likely(!ret))
3621                 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3622                         intel_engine_pm_get(engine);
3623
3624         return ret;
3625 }
3626
3627 static void guc_virtual_context_unpin(struct intel_context *ce)
3628 {
3629         intel_engine_mask_t tmp, mask = ce->engine->mask;
3630         struct intel_engine_cs *engine;
3631         struct intel_guc *guc = ce_to_guc(ce);
3632
3633         GEM_BUG_ON(context_enabled(ce));
3634         GEM_BUG_ON(intel_context_is_barrier(ce));
3635
3636         unpin_guc_id(guc, ce);
3637         lrc_unpin(ce);
3638
3639         for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3640                 intel_engine_pm_put_async(engine);
3641 }
3642
3643 static void guc_virtual_context_enter(struct intel_context *ce)
3644 {
3645         intel_engine_mask_t tmp, mask = ce->engine->mask;
3646         struct intel_engine_cs *engine;
3647
3648         for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3649                 intel_engine_pm_get(engine);
3650
3651         intel_timeline_enter(ce->timeline);
3652 }
3653
3654 static void guc_virtual_context_exit(struct intel_context *ce)
3655 {
3656         intel_engine_mask_t tmp, mask = ce->engine->mask;
3657         struct intel_engine_cs *engine;
3658
3659         for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3660                 intel_engine_pm_put(engine);
3661
3662         intel_timeline_exit(ce->timeline);
3663 }
3664
3665 static int guc_virtual_context_alloc(struct intel_context *ce)
3666 {
3667         struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3668
3669         return lrc_alloc(ce, engine);
3670 }
3671
3672 static const struct intel_context_ops virtual_guc_context_ops = {
3673         .alloc = guc_virtual_context_alloc,
3674
3675         .pre_pin = guc_virtual_context_pre_pin,
3676         .pin = guc_virtual_context_pin,
3677         .unpin = guc_virtual_context_unpin,
3678         .post_unpin = guc_context_post_unpin,
3679
3680         .ban = guc_context_ban,
3681
3682         .cancel_request = guc_context_cancel_request,
3683
3684         .enter = guc_virtual_context_enter,
3685         .exit = guc_virtual_context_exit,
3686
3687         .sched_disable = guc_context_sched_disable,
3688
3689         .destroy = guc_context_destroy,
3690
3691         .get_sibling = guc_virtual_get_sibling,
3692 };
3693
3694 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr)
3695 {
3696         struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3697         struct intel_guc *guc = ce_to_guc(ce);
3698         int ret;
3699
3700         GEM_BUG_ON(!intel_context_is_parent(ce));
3701         GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3702
3703         ret = pin_guc_id(guc, ce);
3704         if (unlikely(ret < 0))
3705                 return ret;
3706
3707         return __guc_context_pin(ce, engine, vaddr);
3708 }
3709
3710 static int guc_child_context_pin(struct intel_context *ce, void *vaddr)
3711 {
3712         struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3713
3714         GEM_BUG_ON(!intel_context_is_child(ce));
3715         GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3716
3717         __intel_context_pin(ce->parallel.parent);
3718         return __guc_context_pin(ce, engine, vaddr);
3719 }
3720
3721 static void guc_parent_context_unpin(struct intel_context *ce)
3722 {
3723         struct intel_guc *guc = ce_to_guc(ce);
3724
3725         GEM_BUG_ON(context_enabled(ce));
3726         GEM_BUG_ON(intel_context_is_barrier(ce));
3727         GEM_BUG_ON(!intel_context_is_parent(ce));
3728         GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3729
3730         unpin_guc_id(guc, ce);
3731         lrc_unpin(ce);
3732 }
3733
3734 static void guc_child_context_unpin(struct intel_context *ce)
3735 {
3736         GEM_BUG_ON(context_enabled(ce));
3737         GEM_BUG_ON(intel_context_is_barrier(ce));
3738         GEM_BUG_ON(!intel_context_is_child(ce));
3739         GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3740
3741         lrc_unpin(ce);
3742 }
3743
3744 static void guc_child_context_post_unpin(struct intel_context *ce)
3745 {
3746         GEM_BUG_ON(!intel_context_is_child(ce));
3747         GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
3748         GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3749
3750         lrc_post_unpin(ce);
3751         intel_context_unpin(ce->parallel.parent);
3752 }
3753
3754 static void guc_child_context_destroy(struct kref *kref)
3755 {
3756         struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3757
3758         __guc_context_destroy(ce);
3759 }
3760
3761 static const struct intel_context_ops virtual_parent_context_ops = {
3762         .alloc = guc_virtual_context_alloc,
3763
3764         .pre_pin = guc_context_pre_pin,
3765         .pin = guc_parent_context_pin,
3766         .unpin = guc_parent_context_unpin,
3767         .post_unpin = guc_context_post_unpin,
3768
3769         .ban = guc_context_ban,
3770
3771         .cancel_request = guc_context_cancel_request,
3772
3773         .enter = guc_virtual_context_enter,
3774         .exit = guc_virtual_context_exit,
3775
3776         .sched_disable = guc_context_sched_disable,
3777
3778         .destroy = guc_context_destroy,
3779
3780         .get_sibling = guc_virtual_get_sibling,
3781 };
3782
3783 static const struct intel_context_ops virtual_child_context_ops = {
3784         .alloc = guc_virtual_context_alloc,
3785
3786         .pre_pin = guc_context_pre_pin,
3787         .pin = guc_child_context_pin,
3788         .unpin = guc_child_context_unpin,
3789         .post_unpin = guc_child_context_post_unpin,
3790
3791         .cancel_request = guc_context_cancel_request,
3792
3793         .enter = guc_virtual_context_enter,
3794         .exit = guc_virtual_context_exit,
3795
3796         .destroy = guc_child_context_destroy,
3797
3798         .get_sibling = guc_virtual_get_sibling,
3799 };
3800
3801 /*
3802  * The below override of the breadcrumbs is enabled when the user configures a
3803  * context for parallel submission (multi-lrc, parent-child).
3804  *
3805  * The overridden breadcrumbs implements an algorithm which allows the GuC to
3806  * safely preempt all the hw contexts configured for parallel submission
3807  * between each BB. The contract between the i915 and GuC is if the parent
3808  * context can be preempted, all the children can be preempted, and the GuC will
3809  * always try to preempt the parent before the children. A handshake between the
3810  * parent / children breadcrumbs ensures the i915 holds up its end of the deal
3811  * creating a window to preempt between each set of BBs.
3812  */
3813 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
3814                                                      u64 offset, u32 len,
3815                                                      const unsigned int flags);
3816 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
3817                                                     u64 offset, u32 len,
3818                                                     const unsigned int flags);
3819 static u32 *
3820 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
3821                                                  u32 *cs);
3822 static u32 *
3823 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
3824                                                 u32 *cs);
3825
3826 static struct intel_context *
3827 guc_create_parallel(struct intel_engine_cs **engines,
3828                     unsigned int num_siblings,
3829                     unsigned int width)
3830 {
3831         struct intel_engine_cs **siblings = NULL;
3832         struct intel_context *parent = NULL, *ce, *err;
3833         int i, j;
3834
3835         siblings = kmalloc_array(num_siblings,
3836                                  sizeof(*siblings),
3837                                  GFP_KERNEL);
3838         if (!siblings)
3839                 return ERR_PTR(-ENOMEM);
3840
3841         for (i = 0; i < width; ++i) {
3842                 for (j = 0; j < num_siblings; ++j)
3843                         siblings[j] = engines[i * num_siblings + j];
3844
3845                 ce = intel_engine_create_virtual(siblings, num_siblings,
3846                                                  FORCE_VIRTUAL);
3847                 if (IS_ERR(ce)) {
3848                         err = ERR_CAST(ce);
3849                         goto unwind;
3850                 }
3851
3852                 if (i == 0) {
3853                         parent = ce;
3854                         parent->ops = &virtual_parent_context_ops;
3855                 } else {
3856                         ce->ops = &virtual_child_context_ops;
3857                         intel_context_bind_parent_child(parent, ce);
3858                 }
3859         }
3860
3861         parent->parallel.fence_context = dma_fence_context_alloc(1);
3862
3863         parent->engine->emit_bb_start =
3864                 emit_bb_start_parent_no_preempt_mid_batch;
3865         parent->engine->emit_fini_breadcrumb =
3866                 emit_fini_breadcrumb_parent_no_preempt_mid_batch;
3867         parent->engine->emit_fini_breadcrumb_dw =
3868                 12 + 4 * parent->parallel.number_children;
3869         for_each_child(parent, ce) {
3870                 ce->engine->emit_bb_start =
3871                         emit_bb_start_child_no_preempt_mid_batch;
3872                 ce->engine->emit_fini_breadcrumb =
3873                         emit_fini_breadcrumb_child_no_preempt_mid_batch;
3874                 ce->engine->emit_fini_breadcrumb_dw = 16;
3875         }
3876
3877         kfree(siblings);
3878         return parent;
3879
3880 unwind:
3881         if (parent)
3882                 intel_context_put(parent);
3883         kfree(siblings);
3884         return err;
3885 }
3886
3887 static bool
3888 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b)
3889 {
3890         struct intel_engine_cs *sibling;
3891         intel_engine_mask_t tmp, mask = b->engine_mask;
3892         bool result = false;
3893
3894         for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
3895                 result |= intel_engine_irq_enable(sibling);
3896
3897         return result;
3898 }
3899
3900 static void
3901 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b)
3902 {
3903         struct intel_engine_cs *sibling;
3904         intel_engine_mask_t tmp, mask = b->engine_mask;
3905
3906         for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
3907                 intel_engine_irq_disable(sibling);
3908 }
3909
3910 static void guc_init_breadcrumbs(struct intel_engine_cs *engine)
3911 {
3912         int i;
3913
3914         /*
3915          * In GuC submission mode we do not know which physical engine a request
3916          * will be scheduled on, this creates a problem because the breadcrumb
3917          * interrupt is per physical engine. To work around this we attach
3918          * requests and direct all breadcrumb interrupts to the first instance
3919          * of an engine per class. In addition all breadcrumb interrupts are
3920          * enabled / disabled across an engine class in unison.
3921          */
3922         for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) {
3923                 struct intel_engine_cs *sibling =
3924                         engine->gt->engine_class[engine->class][i];
3925
3926                 if (sibling) {
3927                         if (engine->breadcrumbs != sibling->breadcrumbs) {
3928                                 intel_breadcrumbs_put(engine->breadcrumbs);
3929                                 engine->breadcrumbs =
3930                                         intel_breadcrumbs_get(sibling->breadcrumbs);
3931                         }
3932                         break;
3933                 }
3934         }
3935
3936         if (engine->breadcrumbs) {
3937                 engine->breadcrumbs->engine_mask |= engine->mask;
3938                 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs;
3939                 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs;
3940         }
3941 }
3942
3943 static void guc_bump_inflight_request_prio(struct i915_request *rq,
3944                                            int prio)
3945 {
3946         struct intel_context *ce = request_to_scheduling_context(rq);
3947         u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
3948
3949         /* Short circuit function */
3950         if (prio < I915_PRIORITY_NORMAL ||
3951             rq->guc_prio == GUC_PRIO_FINI ||
3952             (rq->guc_prio != GUC_PRIO_INIT &&
3953              !new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
3954                 return;
3955
3956         spin_lock(&ce->guc_state.lock);
3957         if (rq->guc_prio != GUC_PRIO_FINI) {
3958                 if (rq->guc_prio != GUC_PRIO_INIT)
3959                         sub_context_inflight_prio(ce, rq->guc_prio);
3960                 rq->guc_prio = new_guc_prio;
3961                 add_context_inflight_prio(ce, rq->guc_prio);
3962                 update_context_prio(ce);
3963         }
3964         spin_unlock(&ce->guc_state.lock);
3965 }
3966
3967 static void guc_retire_inflight_request_prio(struct i915_request *rq)
3968 {
3969         struct intel_context *ce = request_to_scheduling_context(rq);
3970
3971         spin_lock(&ce->guc_state.lock);
3972         guc_prio_fini(rq, ce);
3973         spin_unlock(&ce->guc_state.lock);
3974 }
3975
3976 static void sanitize_hwsp(struct intel_engine_cs *engine)
3977 {
3978         struct intel_timeline *tl;
3979
3980         list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
3981                 intel_timeline_reset_seqno(tl);
3982 }
3983
3984 static void guc_sanitize(struct intel_engine_cs *engine)
3985 {
3986         /*
3987          * Poison residual state on resume, in case the suspend didn't!
3988          *
3989          * We have to assume that across suspend/resume (or other loss
3990          * of control) that the contents of our pinned buffers has been
3991          * lost, replaced by garbage. Since this doesn't always happen,
3992          * let's poison such state so that we more quickly spot when
3993          * we falsely assume it has been preserved.
3994          */
3995         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3996                 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
3997
3998         /*
3999          * The kernel_context HWSP is stored in the status_page. As above,
4000          * that may be lost on resume/initialisation, and so we need to
4001          * reset the value in the HWSP.
4002          */
4003         sanitize_hwsp(engine);
4004
4005         /* And scrub the dirty cachelines for the HWSP */
4006         drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
4007
4008         intel_engine_reset_pinned_contexts(engine);
4009 }
4010
4011 static void setup_hwsp(struct intel_engine_cs *engine)
4012 {
4013         intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
4014
4015         ENGINE_WRITE_FW(engine,
4016                         RING_HWS_PGA,
4017                         i915_ggtt_offset(engine->status_page.vma));
4018 }
4019
4020 static void start_engine(struct intel_engine_cs *engine)
4021 {
4022         ENGINE_WRITE_FW(engine,
4023                         RING_MODE_GEN7,
4024                         _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
4025
4026         ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
4027         ENGINE_POSTING_READ(engine, RING_MI_MODE);
4028 }
4029
4030 static int guc_resume(struct intel_engine_cs *engine)
4031 {
4032         assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
4033
4034         intel_mocs_init_engine(engine);
4035
4036         intel_breadcrumbs_reset(engine->breadcrumbs);
4037
4038         setup_hwsp(engine);
4039         start_engine(engine);
4040
4041         if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
4042                 xehp_enable_ccs_engines(engine);
4043
4044         return 0;
4045 }
4046
4047 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine)
4048 {
4049         return !sched_engine->tasklet.callback;
4050 }
4051
4052 static void guc_set_default_submission(struct intel_engine_cs *engine)
4053 {
4054         engine->submit_request = guc_submit_request;
4055 }
4056
4057 static inline void guc_kernel_context_pin(struct intel_guc *guc,
4058                                           struct intel_context *ce)
4059 {
4060         /*
4061          * Note: we purposefully do not check the returns below because
4062          * the registration can only fail if a reset is just starting.
4063          * This is called at the end of reset so presumably another reset
4064          * isn't happening and even it did this code would be run again.
4065          */
4066
4067         if (context_guc_id_invalid(ce))
4068                 pin_guc_id(guc, ce);
4069
4070         try_context_registration(ce, true);
4071 }
4072
4073 static inline void guc_init_lrc_mapping(struct intel_guc *guc)
4074 {
4075         struct intel_gt *gt = guc_to_gt(guc);
4076         struct intel_engine_cs *engine;
4077         enum intel_engine_id id;
4078
4079         /* make sure all descriptors are clean... */
4080         xa_destroy(&guc->context_lookup);
4081
4082         /*
4083          * Some contexts might have been pinned before we enabled GuC
4084          * submission, so we need to add them to the GuC bookeeping.
4085          * Also, after a reset the of the GuC we want to make sure that the
4086          * information shared with GuC is properly reset. The kernel LRCs are
4087          * not attached to the gem_context, so they need to be added separately.
4088          */
4089         for_each_engine(engine, gt, id) {
4090                 struct intel_context *ce;
4091
4092                 list_for_each_entry(ce, &engine->pinned_contexts_list,
4093                                     pinned_contexts_link)
4094                         guc_kernel_context_pin(guc, ce);
4095         }
4096 }
4097
4098 static void guc_release(struct intel_engine_cs *engine)
4099 {
4100         engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
4101
4102         intel_engine_cleanup_common(engine);
4103         lrc_fini_wa_ctx(engine);
4104 }
4105
4106 static void virtual_guc_bump_serial(struct intel_engine_cs *engine)
4107 {
4108         struct intel_engine_cs *e;
4109         intel_engine_mask_t tmp, mask = engine->mask;
4110
4111         for_each_engine_masked(e, engine->gt, mask, tmp)
4112                 e->serial++;
4113 }
4114
4115 static void guc_default_vfuncs(struct intel_engine_cs *engine)
4116 {
4117         /* Default vfuncs which can be overridden by each engine. */
4118
4119         engine->resume = guc_resume;
4120
4121         engine->cops = &guc_context_ops;
4122         engine->request_alloc = guc_request_alloc;
4123         engine->add_active_request = add_to_context;
4124         engine->remove_active_request = remove_from_context;
4125
4126         engine->sched_engine->schedule = i915_schedule;
4127
4128         engine->reset.prepare = guc_engine_reset_prepare;
4129         engine->reset.rewind = guc_rewind_nop;
4130         engine->reset.cancel = guc_reset_nop;
4131         engine->reset.finish = guc_reset_nop;
4132
4133         engine->emit_flush = gen8_emit_flush_xcs;
4134         engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
4135         engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
4136         if (GRAPHICS_VER(engine->i915) >= 12) {
4137                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
4138                 engine->emit_flush = gen12_emit_flush_xcs;
4139         }
4140         engine->set_default_submission = guc_set_default_submission;
4141         engine->busyness = guc_engine_busyness;
4142
4143         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
4144         engine->flags |= I915_ENGINE_HAS_PREEMPTION;
4145         engine->flags |= I915_ENGINE_HAS_TIMESLICES;
4146
4147         /* Wa_14014475959:dg2 */
4148         if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS)
4149                 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
4150
4151         /*
4152          * TODO: GuC supports timeslicing and semaphores as well, but they're
4153          * handled by the firmware so some minor tweaks are required before
4154          * enabling.
4155          *
4156          * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
4157          */
4158
4159         engine->emit_bb_start = gen8_emit_bb_start;
4160         if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
4161                 engine->emit_bb_start = gen125_emit_bb_start;
4162 }
4163
4164 static void rcs_submission_override(struct intel_engine_cs *engine)
4165 {
4166         switch (GRAPHICS_VER(engine->i915)) {
4167         case 12:
4168                 engine->emit_flush = gen12_emit_flush_rcs;
4169                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
4170                 break;
4171         case 11:
4172                 engine->emit_flush = gen11_emit_flush_rcs;
4173                 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
4174                 break;
4175         default:
4176                 engine->emit_flush = gen8_emit_flush_rcs;
4177                 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
4178                 break;
4179         }
4180 }
4181
4182 static inline void guc_default_irqs(struct intel_engine_cs *engine)
4183 {
4184         engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
4185         intel_engine_set_irq_handler(engine, cs_irq_handler);
4186 }
4187
4188 static void guc_sched_engine_destroy(struct kref *kref)
4189 {
4190         struct i915_sched_engine *sched_engine =
4191                 container_of(kref, typeof(*sched_engine), ref);
4192         struct intel_guc *guc = sched_engine->private_data;
4193
4194         guc->sched_engine = NULL;
4195         tasklet_kill(&sched_engine->tasklet); /* flush the callback */
4196         kfree(sched_engine);
4197 }
4198
4199 int intel_guc_submission_setup(struct intel_engine_cs *engine)
4200 {
4201         struct drm_i915_private *i915 = engine->i915;
4202         struct intel_guc *guc = &engine->gt->uc.guc;
4203
4204         /*
4205          * The setup relies on several assumptions (e.g. irqs always enabled)
4206          * that are only valid on gen11+
4207          */
4208         GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
4209
4210         if (!guc->sched_engine) {
4211                 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL);
4212                 if (!guc->sched_engine)
4213                         return -ENOMEM;
4214
4215                 guc->sched_engine->schedule = i915_schedule;
4216                 guc->sched_engine->disabled = guc_sched_engine_disabled;
4217                 guc->sched_engine->private_data = guc;
4218                 guc->sched_engine->destroy = guc_sched_engine_destroy;
4219                 guc->sched_engine->bump_inflight_request_prio =
4220                         guc_bump_inflight_request_prio;
4221                 guc->sched_engine->retire_inflight_request_prio =
4222                         guc_retire_inflight_request_prio;
4223                 tasklet_setup(&guc->sched_engine->tasklet,
4224                               guc_submission_tasklet);
4225         }
4226         i915_sched_engine_put(engine->sched_engine);
4227         engine->sched_engine = i915_sched_engine_get(guc->sched_engine);
4228
4229         guc_default_vfuncs(engine);
4230         guc_default_irqs(engine);
4231         guc_init_breadcrumbs(engine);
4232
4233         if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)
4234                 rcs_submission_override(engine);
4235
4236         lrc_init_wa_ctx(engine);
4237
4238         /* Finally, take ownership and responsibility for cleanup! */
4239         engine->sanitize = guc_sanitize;
4240         engine->release = guc_release;
4241
4242         return 0;
4243 }
4244
4245 void intel_guc_submission_enable(struct intel_guc *guc)
4246 {
4247         guc_init_lrc_mapping(guc);
4248         guc_init_engine_stats(guc);
4249 }
4250
4251 void intel_guc_submission_disable(struct intel_guc *guc)
4252 {
4253         /* Note: By the time we're here, GuC may have already been reset */
4254 }
4255
4256 static bool __guc_submission_supported(struct intel_guc *guc)
4257 {
4258         /* GuC submission is unavailable for pre-Gen11 */
4259         return intel_guc_is_supported(guc) &&
4260                GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11;
4261 }
4262
4263 static bool __guc_submission_selected(struct intel_guc *guc)
4264 {
4265         struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
4266
4267         if (!intel_guc_submission_is_supported(guc))
4268                 return false;
4269
4270         return i915->params.enable_guc & ENABLE_GUC_SUBMISSION;
4271 }
4272
4273 void intel_guc_submission_init_early(struct intel_guc *guc)
4274 {
4275         xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
4276
4277         spin_lock_init(&guc->submission_state.lock);
4278         INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
4279         ida_init(&guc->submission_state.guc_ids);
4280         INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
4281         INIT_WORK(&guc->submission_state.destroyed_worker,
4282                   destroyed_worker_func);
4283         INIT_WORK(&guc->submission_state.reset_fail_worker,
4284                   reset_fail_worker_func);
4285
4286         spin_lock_init(&guc->timestamp.lock);
4287         INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
4288
4289         guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID;
4290         guc->submission_supported = __guc_submission_supported(guc);
4291         guc->submission_selected = __guc_submission_selected(guc);
4292 }
4293
4294 static inline struct intel_context *
4295 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id)
4296 {
4297         struct intel_context *ce;
4298
4299         if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) {
4300                 drm_err(&guc_to_gt(guc)->i915->drm,
4301                         "Invalid ctx_id %u\n", ctx_id);
4302                 return NULL;
4303         }
4304
4305         ce = __get_context(guc, ctx_id);
4306         if (unlikely(!ce)) {
4307                 drm_err(&guc_to_gt(guc)->i915->drm,
4308                         "Context is NULL, ctx_id %u\n", ctx_id);
4309                 return NULL;
4310         }
4311
4312         if (unlikely(intel_context_is_child(ce))) {
4313                 drm_err(&guc_to_gt(guc)->i915->drm,
4314                         "Context is child, ctx_id %u\n", ctx_id);
4315                 return NULL;
4316         }
4317
4318         return ce;
4319 }
4320
4321 int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
4322                                           const u32 *msg,
4323                                           u32 len)
4324 {
4325         struct intel_context *ce;
4326         u32 ctx_id;
4327
4328         if (unlikely(len < 1)) {
4329                 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
4330                 return -EPROTO;
4331         }
4332         ctx_id = msg[0];
4333
4334         ce = g2h_context_lookup(guc, ctx_id);
4335         if (unlikely(!ce))
4336                 return -EPROTO;
4337
4338         trace_intel_context_deregister_done(ce);
4339
4340 #ifdef CONFIG_DRM_I915_SELFTEST
4341         if (unlikely(ce->drop_deregister)) {
4342                 ce->drop_deregister = false;
4343                 return 0;
4344         }
4345 #endif
4346
4347         if (context_wait_for_deregister_to_register(ce)) {
4348                 struct intel_runtime_pm *runtime_pm =
4349                         &ce->engine->gt->i915->runtime_pm;
4350                 intel_wakeref_t wakeref;
4351
4352                 /*
4353                  * Previous owner of this guc_id has been deregistered, now safe
4354                  * register this context.
4355                  */
4356                 with_intel_runtime_pm(runtime_pm, wakeref)
4357                         register_context(ce, true);
4358                 guc_signal_context_fence(ce);
4359                 intel_context_put(ce);
4360         } else if (context_destroyed(ce)) {
4361                 /* Context has been destroyed */
4362                 intel_gt_pm_put_async(guc_to_gt(guc));
4363                 release_guc_id(guc, ce);
4364                 __guc_context_destroy(ce);
4365         }
4366
4367         decr_outstanding_submission_g2h(guc);
4368
4369         return 0;
4370 }
4371
4372 int intel_guc_sched_done_process_msg(struct intel_guc *guc,
4373                                      const u32 *msg,
4374                                      u32 len)
4375 {
4376         struct intel_context *ce;
4377         unsigned long flags;
4378         u32 ctx_id;
4379
4380         if (unlikely(len < 2)) {
4381                 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
4382                 return -EPROTO;
4383         }
4384         ctx_id = msg[0];
4385
4386         ce = g2h_context_lookup(guc, ctx_id);
4387         if (unlikely(!ce))
4388                 return -EPROTO;
4389
4390         if (unlikely(context_destroyed(ce) ||
4391                      (!context_pending_enable(ce) &&
4392                      !context_pending_disable(ce)))) {
4393                 drm_err(&guc_to_gt(guc)->i915->drm,
4394                         "Bad context sched_state 0x%x, ctx_id %u\n",
4395                         ce->guc_state.sched_state, ctx_id);
4396                 return -EPROTO;
4397         }
4398
4399         trace_intel_context_sched_done(ce);
4400
4401         if (context_pending_enable(ce)) {
4402 #ifdef CONFIG_DRM_I915_SELFTEST
4403                 if (unlikely(ce->drop_schedule_enable)) {
4404                         ce->drop_schedule_enable = false;
4405                         return 0;
4406                 }
4407 #endif
4408
4409                 spin_lock_irqsave(&ce->guc_state.lock, flags);
4410                 clr_context_pending_enable(ce);
4411                 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
4412         } else if (context_pending_disable(ce)) {
4413                 bool banned;
4414
4415 #ifdef CONFIG_DRM_I915_SELFTEST
4416                 if (unlikely(ce->drop_schedule_disable)) {
4417                         ce->drop_schedule_disable = false;
4418                         return 0;
4419                 }
4420 #endif
4421
4422                 /*
4423                  * Unpin must be done before __guc_signal_context_fence,
4424                  * otherwise a race exists between the requests getting
4425                  * submitted + retired before this unpin completes resulting in
4426                  * the pin_count going to zero and the context still being
4427                  * enabled.
4428                  */
4429                 intel_context_sched_disable_unpin(ce);
4430
4431                 spin_lock_irqsave(&ce->guc_state.lock, flags);
4432                 banned = context_banned(ce);
4433                 clr_context_banned(ce);
4434                 clr_context_pending_disable(ce);
4435                 __guc_signal_context_fence(ce);
4436                 guc_blocked_fence_complete(ce);
4437                 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
4438
4439                 if (banned) {
4440                         guc_cancel_context_requests(ce);
4441                         intel_engine_signal_breadcrumbs(ce->engine);
4442                 }
4443         }
4444
4445         decr_outstanding_submission_g2h(guc);
4446         intel_context_put(ce);
4447
4448         return 0;
4449 }
4450
4451 static void capture_error_state(struct intel_guc *guc,
4452                                 struct intel_context *ce)
4453 {
4454         struct intel_gt *gt = guc_to_gt(guc);
4455         struct drm_i915_private *i915 = gt->i915;
4456         struct intel_engine_cs *engine = __context_to_physical_engine(ce);
4457         intel_wakeref_t wakeref;
4458
4459         intel_engine_set_hung_context(engine, ce);
4460         with_intel_runtime_pm(&i915->runtime_pm, wakeref)
4461                 i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE);
4462         atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]);
4463 }
4464
4465 static void guc_context_replay(struct intel_context *ce)
4466 {
4467         struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
4468
4469         __guc_reset_context(ce, ce->engine->mask);
4470         tasklet_hi_schedule(&sched_engine->tasklet);
4471 }
4472
4473 static void guc_handle_context_reset(struct intel_guc *guc,
4474                                      struct intel_context *ce)
4475 {
4476         trace_intel_context_reset(ce);
4477
4478         if (likely(!intel_context_is_banned(ce))) {
4479                 capture_error_state(guc, ce);
4480                 guc_context_replay(ce);
4481         } else {
4482                 drm_info(&guc_to_gt(guc)->i915->drm,
4483                          "Ignoring context reset notification of banned context 0x%04X on %s",
4484                          ce->guc_id.id, ce->engine->name);
4485         }
4486 }
4487
4488 int intel_guc_context_reset_process_msg(struct intel_guc *guc,
4489                                         const u32 *msg, u32 len)
4490 {
4491         struct intel_context *ce;
4492         unsigned long flags;
4493         int ctx_id;
4494
4495         if (unlikely(len != 1)) {
4496                 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
4497                 return -EPROTO;
4498         }
4499
4500         ctx_id = msg[0];
4501
4502         /*
4503          * The context lookup uses the xarray but lookups only require an RCU lock
4504          * not the full spinlock. So take the lock explicitly and keep it until the
4505          * context has been reference count locked to ensure it can't be destroyed
4506          * asynchronously until the reset is done.
4507          */
4508         xa_lock_irqsave(&guc->context_lookup, flags);
4509         ce = g2h_context_lookup(guc, ctx_id);
4510         if (ce)
4511                 intel_context_get(ce);
4512         xa_unlock_irqrestore(&guc->context_lookup, flags);
4513
4514         if (unlikely(!ce))
4515                 return -EPROTO;
4516
4517         guc_handle_context_reset(guc, ce);
4518         intel_context_put(ce);
4519
4520         return 0;
4521 }
4522
4523 int intel_guc_error_capture_process_msg(struct intel_guc *guc,
4524                                         const u32 *msg, u32 len)
4525 {
4526         u32 status;
4527
4528         if (unlikely(len != 1)) {
4529                 drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
4530                 return -EPROTO;
4531         }
4532
4533         status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
4534         if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
4535                 drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space");
4536
4537         intel_guc_capture_process(guc);
4538
4539         return 0;
4540 }
4541
4542 struct intel_engine_cs *
4543 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
4544 {
4545         struct intel_gt *gt = guc_to_gt(guc);
4546         u8 engine_class = guc_class_to_engine_class(guc_class);
4547
4548         /* Class index is checked in class converter */
4549         GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE);
4550
4551         return gt->engine_class[engine_class][instance];
4552 }
4553
4554 static void reset_fail_worker_func(struct work_struct *w)
4555 {
4556         struct intel_guc *guc = container_of(w, struct intel_guc,
4557                                              submission_state.reset_fail_worker);
4558         struct intel_gt *gt = guc_to_gt(guc);
4559         intel_engine_mask_t reset_fail_mask;
4560         unsigned long flags;
4561
4562         spin_lock_irqsave(&guc->submission_state.lock, flags);
4563         reset_fail_mask = guc->submission_state.reset_fail_mask;
4564         guc->submission_state.reset_fail_mask = 0;
4565         spin_unlock_irqrestore(&guc->submission_state.lock, flags);
4566
4567         if (likely(reset_fail_mask))
4568                 intel_gt_handle_error(gt, reset_fail_mask,
4569                                       I915_ERROR_CAPTURE,
4570                                       "GuC failed to reset engine mask=0x%x\n",
4571                                       reset_fail_mask);
4572 }
4573
4574 int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
4575                                          const u32 *msg, u32 len)
4576 {
4577         struct intel_engine_cs *engine;
4578         struct intel_gt *gt = guc_to_gt(guc);
4579         u8 guc_class, instance;
4580         u32 reason;
4581         unsigned long flags;
4582
4583         if (unlikely(len != 3)) {
4584                 drm_err(&gt->i915->drm, "Invalid length %u", len);
4585                 return -EPROTO;
4586         }
4587
4588         guc_class = msg[0];
4589         instance = msg[1];
4590         reason = msg[2];
4591
4592         engine = intel_guc_lookup_engine(guc, guc_class, instance);
4593         if (unlikely(!engine)) {
4594                 drm_err(&gt->i915->drm,
4595                         "Invalid engine %d:%d", guc_class, instance);
4596                 return -EPROTO;
4597         }
4598
4599         /*
4600          * This is an unexpected failure of a hardware feature. So, log a real
4601          * error message not just the informational that comes with the reset.
4602          */
4603         drm_err(&gt->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",
4604                 guc_class, instance, engine->name, reason);
4605
4606         spin_lock_irqsave(&guc->submission_state.lock, flags);
4607         guc->submission_state.reset_fail_mask |= engine->mask;
4608         spin_unlock_irqrestore(&guc->submission_state.lock, flags);
4609
4610         /*
4611          * A GT reset flushes this worker queue (G2H handler) so we must use
4612          * another worker to trigger a GT reset.
4613          */
4614         queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker);
4615
4616         return 0;
4617 }
4618
4619 void intel_guc_find_hung_context(struct intel_engine_cs *engine)
4620 {
4621         struct intel_guc *guc = &engine->gt->uc.guc;
4622         struct intel_context *ce;
4623         struct i915_request *rq;
4624         unsigned long index;
4625         unsigned long flags;
4626
4627         /* Reset called during driver load? GuC not yet initialised! */
4628         if (unlikely(!guc_submission_initialized(guc)))
4629                 return;
4630
4631         xa_lock_irqsave(&guc->context_lookup, flags);
4632         xa_for_each(&guc->context_lookup, index, ce) {
4633                 if (!kref_get_unless_zero(&ce->ref))
4634                         continue;
4635
4636                 xa_unlock(&guc->context_lookup);
4637
4638                 if (!intel_context_is_pinned(ce))
4639                         goto next;
4640
4641                 if (intel_engine_is_virtual(ce->engine)) {
4642                         if (!(ce->engine->mask & engine->mask))
4643                                 goto next;
4644                 } else {
4645                         if (ce->engine != engine)
4646                                 goto next;
4647                 }
4648
4649                 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) {
4650                         if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE)
4651                                 continue;
4652
4653                         intel_engine_set_hung_context(engine, ce);
4654
4655                         /* Can only cope with one hang at a time... */
4656                         intel_context_put(ce);
4657                         xa_lock(&guc->context_lookup);
4658                         goto done;
4659                 }
4660 next:
4661                 intel_context_put(ce);
4662                 xa_lock(&guc->context_lookup);
4663         }
4664 done:
4665         xa_unlock_irqrestore(&guc->context_lookup, flags);
4666 }
4667
4668 void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
4669                                     struct i915_request *hung_rq,
4670                                     struct drm_printer *m)
4671 {
4672         struct intel_guc *guc = &engine->gt->uc.guc;
4673         struct intel_context *ce;
4674         unsigned long index;
4675         unsigned long flags;
4676
4677         /* Reset called during driver load? GuC not yet initialised! */
4678         if (unlikely(!guc_submission_initialized(guc)))
4679                 return;
4680
4681         xa_lock_irqsave(&guc->context_lookup, flags);
4682         xa_for_each(&guc->context_lookup, index, ce) {
4683                 if (!kref_get_unless_zero(&ce->ref))
4684                         continue;
4685
4686                 xa_unlock(&guc->context_lookup);
4687
4688                 if (!intel_context_is_pinned(ce))
4689                         goto next;
4690
4691                 if (intel_engine_is_virtual(ce->engine)) {
4692                         if (!(ce->engine->mask & engine->mask))
4693                                 goto next;
4694                 } else {
4695                         if (ce->engine != engine)
4696                                 goto next;
4697                 }
4698
4699                 spin_lock(&ce->guc_state.lock);
4700                 intel_engine_dump_active_requests(&ce->guc_state.requests,
4701                                                   hung_rq, m);
4702                 spin_unlock(&ce->guc_state.lock);
4703
4704 next:
4705                 intel_context_put(ce);
4706                 xa_lock(&guc->context_lookup);
4707         }
4708         xa_unlock_irqrestore(&guc->context_lookup, flags);
4709 }
4710
4711 void intel_guc_submission_print_info(struct intel_guc *guc,
4712                                      struct drm_printer *p)
4713 {
4714         struct i915_sched_engine *sched_engine = guc->sched_engine;
4715         struct rb_node *rb;
4716         unsigned long flags;
4717
4718         if (!sched_engine)
4719                 return;
4720
4721         drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n",
4722                    atomic_read(&guc->outstanding_submission_g2h));
4723         drm_printf(p, "GuC tasklet count: %u\n\n",
4724                    atomic_read(&sched_engine->tasklet.count));
4725
4726         spin_lock_irqsave(&sched_engine->lock, flags);
4727         drm_printf(p, "Requests in GuC submit tasklet:\n");
4728         for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) {
4729                 struct i915_priolist *pl = to_priolist(rb);
4730                 struct i915_request *rq;
4731
4732                 priolist_for_each_request(rq, pl)
4733                         drm_printf(p, "guc_id=%u, seqno=%llu\n",
4734                                    rq->context->guc_id.id,
4735                                    rq->fence.seqno);
4736         }
4737         spin_unlock_irqrestore(&sched_engine->lock, flags);
4738         drm_printf(p, "\n");
4739 }
4740
4741 static inline void guc_log_context_priority(struct drm_printer *p,
4742                                             struct intel_context *ce)
4743 {
4744         int i;
4745
4746         drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio);
4747         drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n");
4748         for (i = GUC_CLIENT_PRIORITY_KMD_HIGH;
4749              i < GUC_CLIENT_PRIORITY_NUM; ++i) {
4750                 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n",
4751                            i, ce->guc_state.prio_count[i]);
4752         }
4753         drm_printf(p, "\n");
4754 }
4755
4756 static inline void guc_log_context(struct drm_printer *p,
4757                                    struct intel_context *ce)
4758 {
4759         drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
4760         drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
4761         drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
4762                    ce->ring->head,
4763                    ce->lrc_reg_state[CTX_RING_HEAD]);
4764         drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
4765                    ce->ring->tail,
4766                    ce->lrc_reg_state[CTX_RING_TAIL]);
4767         drm_printf(p, "\t\tContext Pin Count: %u\n",
4768                    atomic_read(&ce->pin_count));
4769         drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
4770                    atomic_read(&ce->guc_id.ref));
4771         drm_printf(p, "\t\tSchedule State: 0x%x\n\n",
4772                    ce->guc_state.sched_state);
4773 }
4774
4775 void intel_guc_submission_print_context_info(struct intel_guc *guc,
4776                                              struct drm_printer *p)
4777 {
4778         struct intel_context *ce;
4779         unsigned long index;
4780         unsigned long flags;
4781
4782         xa_lock_irqsave(&guc->context_lookup, flags);
4783         xa_for_each(&guc->context_lookup, index, ce) {
4784                 GEM_BUG_ON(intel_context_is_child(ce));
4785
4786                 guc_log_context(p, ce);
4787                 guc_log_context_priority(p, ce);
4788
4789                 if (intel_context_is_parent(ce)) {
4790                         struct intel_context *child;
4791
4792                         drm_printf(p, "\t\tNumber children: %u\n",
4793                                    ce->parallel.number_children);
4794
4795                         if (ce->parallel.guc.wq_status) {
4796                                 drm_printf(p, "\t\tWQI Head: %u\n",
4797                                            READ_ONCE(*ce->parallel.guc.wq_head));
4798                                 drm_printf(p, "\t\tWQI Tail: %u\n",
4799                                            READ_ONCE(*ce->parallel.guc.wq_tail));
4800                                 drm_printf(p, "\t\tWQI Status: %u\n\n",
4801                                            READ_ONCE(*ce->parallel.guc.wq_status));
4802                         }
4803
4804                         if (ce->engine->emit_bb_start ==
4805                             emit_bb_start_parent_no_preempt_mid_batch) {
4806                                 u8 i;
4807
4808                                 drm_printf(p, "\t\tChildren Go: %u\n\n",
4809                                            get_children_go_value(ce));
4810                                 for (i = 0; i < ce->parallel.number_children; ++i)
4811                                         drm_printf(p, "\t\tChildren Join: %u\n",
4812                                                    get_children_join_value(ce, i));
4813                         }
4814
4815                         for_each_child(ce, child)
4816                                 guc_log_context(p, child);
4817                 }
4818         }
4819         xa_unlock_irqrestore(&guc->context_lookup, flags);
4820 }
4821
4822 static inline u32 get_children_go_addr(struct intel_context *ce)
4823 {
4824         GEM_BUG_ON(!intel_context_is_parent(ce));
4825
4826         return i915_ggtt_offset(ce->state) +
4827                 __get_parent_scratch_offset(ce) +
4828                 offsetof(struct parent_scratch, go.semaphore);
4829 }
4830
4831 static inline u32 get_children_join_addr(struct intel_context *ce,
4832                                          u8 child_index)
4833 {
4834         GEM_BUG_ON(!intel_context_is_parent(ce));
4835
4836         return i915_ggtt_offset(ce->state) +
4837                 __get_parent_scratch_offset(ce) +
4838                 offsetof(struct parent_scratch, join[child_index].semaphore);
4839 }
4840
4841 #define PARENT_GO_BB                    1
4842 #define PARENT_GO_FINI_BREADCRUMB       0
4843 #define CHILD_GO_BB                     1
4844 #define CHILD_GO_FINI_BREADCRUMB        0
4845 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
4846                                                      u64 offset, u32 len,
4847                                                      const unsigned int flags)
4848 {
4849         struct intel_context *ce = rq->context;
4850         u32 *cs;
4851         u8 i;
4852
4853         GEM_BUG_ON(!intel_context_is_parent(ce));
4854
4855         cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children);
4856         if (IS_ERR(cs))
4857                 return PTR_ERR(cs);
4858
4859         /* Wait on children */
4860         for (i = 0; i < ce->parallel.number_children; ++i) {
4861                 *cs++ = (MI_SEMAPHORE_WAIT |
4862                          MI_SEMAPHORE_GLOBAL_GTT |
4863                          MI_SEMAPHORE_POLL |
4864                          MI_SEMAPHORE_SAD_EQ_SDD);
4865                 *cs++ = PARENT_GO_BB;
4866                 *cs++ = get_children_join_addr(ce, i);
4867                 *cs++ = 0;
4868         }
4869
4870         /* Turn off preemption */
4871         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4872         *cs++ = MI_NOOP;
4873
4874         /* Tell children go */
4875         cs = gen8_emit_ggtt_write(cs,
4876                                   CHILD_GO_BB,
4877                                   get_children_go_addr(ce),
4878                                   0);
4879
4880         /* Jump to batch */
4881         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4882                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4883         *cs++ = lower_32_bits(offset);
4884         *cs++ = upper_32_bits(offset);
4885         *cs++ = MI_NOOP;
4886
4887         intel_ring_advance(rq, cs);
4888
4889         return 0;
4890 }
4891
4892 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
4893                                                     u64 offset, u32 len,
4894                                                     const unsigned int flags)
4895 {
4896         struct intel_context *ce = rq->context;
4897         struct intel_context *parent = intel_context_to_parent(ce);
4898         u32 *cs;
4899
4900         GEM_BUG_ON(!intel_context_is_child(ce));
4901
4902         cs = intel_ring_begin(rq, 12);
4903         if (IS_ERR(cs))
4904                 return PTR_ERR(cs);
4905
4906         /* Signal parent */
4907         cs = gen8_emit_ggtt_write(cs,
4908                                   PARENT_GO_BB,
4909                                   get_children_join_addr(parent,
4910                                                          ce->parallel.child_index),
4911                                   0);
4912
4913         /* Wait on parent for go */
4914         *cs++ = (MI_SEMAPHORE_WAIT |
4915                  MI_SEMAPHORE_GLOBAL_GTT |
4916                  MI_SEMAPHORE_POLL |
4917                  MI_SEMAPHORE_SAD_EQ_SDD);
4918         *cs++ = CHILD_GO_BB;
4919         *cs++ = get_children_go_addr(parent);
4920         *cs++ = 0;
4921
4922         /* Turn off preemption */
4923         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4924
4925         /* Jump to batch */
4926         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4927                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4928         *cs++ = lower_32_bits(offset);
4929         *cs++ = upper_32_bits(offset);
4930
4931         intel_ring_advance(rq, cs);
4932
4933         return 0;
4934 }
4935
4936 static u32 *
4937 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
4938                                                    u32 *cs)
4939 {
4940         struct intel_context *ce = rq->context;
4941         u8 i;
4942
4943         GEM_BUG_ON(!intel_context_is_parent(ce));
4944
4945         /* Wait on children */
4946         for (i = 0; i < ce->parallel.number_children; ++i) {
4947                 *cs++ = (MI_SEMAPHORE_WAIT |
4948                          MI_SEMAPHORE_GLOBAL_GTT |
4949                          MI_SEMAPHORE_POLL |
4950                          MI_SEMAPHORE_SAD_EQ_SDD);
4951                 *cs++ = PARENT_GO_FINI_BREADCRUMB;
4952                 *cs++ = get_children_join_addr(ce, i);
4953                 *cs++ = 0;
4954         }
4955
4956         /* Turn on preemption */
4957         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4958         *cs++ = MI_NOOP;
4959
4960         /* Tell children go */
4961         cs = gen8_emit_ggtt_write(cs,
4962                                   CHILD_GO_FINI_BREADCRUMB,
4963                                   get_children_go_addr(ce),
4964                                   0);
4965
4966         return cs;
4967 }
4968
4969 /*
4970  * If this true, a submission of multi-lrc requests had an error and the
4971  * requests need to be skipped. The front end (execuf IOCTL) should've called
4972  * i915_request_skip which squashes the BB but we still need to emit the fini
4973  * breadrcrumbs seqno write. At this point we don't know how many of the
4974  * requests in the multi-lrc submission were generated so we can't do the
4975  * handshake between the parent and children (e.g. if 4 requests should be
4976  * generated but 2nd hit an error only 1 would be seen by the GuC backend).
4977  * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
4978  * has occurred on any of the requests in submission / relationship.
4979  */
4980 static inline bool skip_handshake(struct i915_request *rq)
4981 {
4982         return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
4983 }
4984
4985 #define NON_SKIP_LEN    6
4986 static u32 *
4987 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
4988                                                  u32 *cs)
4989 {
4990         struct intel_context *ce = rq->context;
4991         __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
4992         __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
4993
4994         GEM_BUG_ON(!intel_context_is_parent(ce));
4995
4996         if (unlikely(skip_handshake(rq))) {
4997                 /*
4998                  * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch,
4999                  * the NON_SKIP_LEN comes from the length of the emits below.
5000                  */
5001                 memset(cs, 0, sizeof(u32) *
5002                        (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
5003                 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
5004         } else {
5005                 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
5006         }
5007
5008         /* Emit fini breadcrumb */
5009         before_fini_breadcrumb_user_interrupt_cs = cs;
5010         cs = gen8_emit_ggtt_write(cs,
5011                                   rq->fence.seqno,
5012                                   i915_request_active_timeline(rq)->hwsp_offset,
5013                                   0);
5014
5015         /* User interrupt */
5016         *cs++ = MI_USER_INTERRUPT;
5017         *cs++ = MI_NOOP;
5018
5019         /* Ensure our math for skip + emit is correct */
5020         GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
5021                    cs);
5022         GEM_BUG_ON(start_fini_breadcrumb_cs +
5023                    ce->engine->emit_fini_breadcrumb_dw != cs);
5024
5025         rq->tail = intel_ring_offset(rq, cs);
5026
5027         return cs;
5028 }
5029
5030 static u32 *
5031 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
5032                                                   u32 *cs)
5033 {
5034         struct intel_context *ce = rq->context;
5035         struct intel_context *parent = intel_context_to_parent(ce);
5036
5037         GEM_BUG_ON(!intel_context_is_child(ce));
5038
5039         /* Turn on preemption */
5040         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5041         *cs++ = MI_NOOP;
5042
5043         /* Signal parent */
5044         cs = gen8_emit_ggtt_write(cs,
5045                                   PARENT_GO_FINI_BREADCRUMB,
5046                                   get_children_join_addr(parent,
5047                                                          ce->parallel.child_index),
5048                                   0);
5049
5050         /* Wait parent on for go */
5051         *cs++ = (MI_SEMAPHORE_WAIT |
5052                  MI_SEMAPHORE_GLOBAL_GTT |
5053                  MI_SEMAPHORE_POLL |
5054                  MI_SEMAPHORE_SAD_EQ_SDD);
5055         *cs++ = CHILD_GO_FINI_BREADCRUMB;
5056         *cs++ = get_children_go_addr(parent);
5057         *cs++ = 0;
5058
5059         return cs;
5060 }
5061
5062 static u32 *
5063 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
5064                                                 u32 *cs)
5065 {
5066         struct intel_context *ce = rq->context;
5067         __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
5068         __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
5069
5070         GEM_BUG_ON(!intel_context_is_child(ce));
5071
5072         if (unlikely(skip_handshake(rq))) {
5073                 /*
5074                  * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch,
5075                  * the NON_SKIP_LEN comes from the length of the emits below.
5076                  */
5077                 memset(cs, 0, sizeof(u32) *
5078                        (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
5079                 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
5080         } else {
5081                 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
5082         }
5083
5084         /* Emit fini breadcrumb */
5085         before_fini_breadcrumb_user_interrupt_cs = cs;
5086         cs = gen8_emit_ggtt_write(cs,
5087                                   rq->fence.seqno,
5088                                   i915_request_active_timeline(rq)->hwsp_offset,
5089                                   0);
5090
5091         /* User interrupt */
5092         *cs++ = MI_USER_INTERRUPT;
5093         *cs++ = MI_NOOP;
5094
5095         /* Ensure our math for skip + emit is correct */
5096         GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
5097                    cs);
5098         GEM_BUG_ON(start_fini_breadcrumb_cs +
5099                    ce->engine->emit_fini_breadcrumb_dw != cs);
5100
5101         rq->tail = intel_ring_offset(rq, cs);
5102
5103         return cs;
5104 }
5105
5106 #undef NON_SKIP_LEN
5107
5108 static struct intel_context *
5109 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
5110                    unsigned long flags)
5111 {
5112         struct guc_virtual_engine *ve;
5113         struct intel_guc *guc;
5114         unsigned int n;
5115         int err;
5116
5117         ve = kzalloc(sizeof(*ve), GFP_KERNEL);
5118         if (!ve)
5119                 return ERR_PTR(-ENOMEM);
5120
5121         guc = &siblings[0]->gt->uc.guc;
5122
5123         ve->base.i915 = siblings[0]->i915;
5124         ve->base.gt = siblings[0]->gt;
5125         ve->base.uncore = siblings[0]->uncore;
5126         ve->base.id = -1;
5127
5128         ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5129         ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5130         ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5131         ve->base.saturated = ALL_ENGINES;
5132
5133         snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5134
5135         ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine);
5136
5137         ve->base.cops = &virtual_guc_context_ops;
5138         ve->base.request_alloc = guc_request_alloc;
5139         ve->base.bump_serial = virtual_guc_bump_serial;
5140
5141         ve->base.submit_request = guc_submit_request;
5142
5143         ve->base.flags = I915_ENGINE_IS_VIRTUAL;
5144
5145         intel_context_init(&ve->context, &ve->base);
5146
5147         for (n = 0; n < count; n++) {
5148                 struct intel_engine_cs *sibling = siblings[n];
5149
5150                 GEM_BUG_ON(!is_power_of_2(sibling->mask));
5151                 if (sibling->mask & ve->base.mask) {
5152                         DRM_DEBUG("duplicate %s entry in load balancer\n",
5153                                   sibling->name);
5154                         err = -EINVAL;
5155                         goto err_put;
5156                 }
5157
5158                 ve->base.mask |= sibling->mask;
5159                 ve->base.logical_mask |= sibling->logical_mask;
5160
5161                 if (n != 0 && ve->base.class != sibling->class) {
5162                         DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5163                                   sibling->class, ve->base.class);
5164                         err = -EINVAL;
5165                         goto err_put;
5166                 } else if (n == 0) {
5167                         ve->base.class = sibling->class;
5168                         ve->base.uabi_class = sibling->uabi_class;
5169                         snprintf(ve->base.name, sizeof(ve->base.name),
5170                                  "v%dx%d", ve->base.class, count);
5171                         ve->base.context_size = sibling->context_size;
5172
5173                         ve->base.add_active_request =
5174                                 sibling->add_active_request;
5175                         ve->base.remove_active_request =
5176                                 sibling->remove_active_request;
5177                         ve->base.emit_bb_start = sibling->emit_bb_start;
5178                         ve->base.emit_flush = sibling->emit_flush;
5179                         ve->base.emit_init_breadcrumb =
5180                                 sibling->emit_init_breadcrumb;
5181                         ve->base.emit_fini_breadcrumb =
5182                                 sibling->emit_fini_breadcrumb;
5183                         ve->base.emit_fini_breadcrumb_dw =
5184                                 sibling->emit_fini_breadcrumb_dw;
5185                         ve->base.breadcrumbs =
5186                                 intel_breadcrumbs_get(sibling->breadcrumbs);
5187
5188                         ve->base.flags |= sibling->flags;
5189
5190                         ve->base.props.timeslice_duration_ms =
5191                                 sibling->props.timeslice_duration_ms;
5192                         ve->base.props.preempt_timeout_ms =
5193                                 sibling->props.preempt_timeout_ms;
5194                 }
5195         }
5196
5197         return &ve->context;
5198
5199 err_put:
5200         intel_context_put(&ve->context);
5201         return ERR_PTR(err);
5202 }
5203
5204 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
5205 {
5206         struct intel_engine_cs *engine;
5207         intel_engine_mask_t tmp, mask = ve->mask;
5208
5209         for_each_engine_masked(engine, ve->gt, mask, tmp)
5210                 if (READ_ONCE(engine->props.heartbeat_interval_ms))
5211                         return true;
5212
5213         return false;
5214 }
5215
5216 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5217 #include "selftest_guc.c"
5218 #include "selftest_guc_multi_lrc.c"
5219 #endif