2 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Zhi Wang <zhi.a.wang@intel.com>
27 * Ping Gao <ping.a.gao@intel.com>
28 * Tina Zhang <tina.zhang@intel.com>
29 * Chanbin Du <changbin.du@intel.com>
30 * Min He <min.he@intel.com>
31 * Bing Niu <bing.niu@intel.com>
32 * Zhenyu Wang <zhenyuw@linux.intel.com>
36 #include <linux/kthread.h>
41 #define RING_CTX_OFF(x) \
42 offsetof(struct execlist_ring_context, x)
44 static void set_context_pdp_root_pointer(
45 struct execlist_ring_context *ring_context,
48 struct execlist_mmio_pair *pdp_pair = &ring_context->pdp3_UDW;
51 for (i = 0; i < 8; i++)
52 pdp_pair[i].val = pdp[7 - i];
55 static int populate_shadow_context(struct intel_vgpu_workload *workload)
57 struct intel_vgpu *vgpu = workload->vgpu;
58 struct intel_gvt *gvt = vgpu->gvt;
59 int ring_id = workload->ring_id;
60 struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx;
61 struct drm_i915_gem_object *ctx_obj =
62 shadow_ctx->engine[ring_id].state->obj;
63 struct execlist_ring_context *shadow_ring_context;
66 unsigned long context_gpa, context_page_num;
69 gvt_dbg_sched("ring id %d workload lrca %x", ring_id,
70 workload->ctx_desc.lrca);
72 context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
74 context_page_num = context_page_num >> PAGE_SHIFT;
76 if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS)
77 context_page_num = 19;
81 while (i < context_page_num) {
82 context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
83 (u32)((workload->ctx_desc.lrca + i) <<
84 I915_GTT_PAGE_SHIFT));
85 if (context_gpa == INTEL_GVT_INVALID_ADDR) {
86 gvt_vgpu_err("Invalid guest context descriptor\n");
90 page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i);
92 intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst,
98 page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
99 shadow_ring_context = kmap(page);
101 #define COPY_REG(name) \
102 intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
103 + RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
106 COPY_REG(ctx_timestamp);
108 if (ring_id == RCS) {
109 COPY_REG(bb_per_ctx_ptr);
110 COPY_REG(rcs_indirect_ctx);
111 COPY_REG(rcs_indirect_ctx_offset);
115 set_context_pdp_root_pointer(shadow_ring_context,
116 workload->shadow_mm->shadow_page_table);
118 intel_gvt_hypervisor_read_gpa(vgpu,
119 workload->ring_context_gpa +
120 sizeof(*shadow_ring_context),
121 (void *)shadow_ring_context +
122 sizeof(*shadow_ring_context),
123 I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
129 static inline bool is_gvt_request(struct drm_i915_gem_request *req)
131 return i915_gem_context_force_single_submission(req->ctx);
134 static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id)
136 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
137 u32 ring_base = dev_priv->engine[ring_id]->mmio_base;
140 reg = RING_INSTDONE(ring_base);
141 vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg);
142 reg = RING_ACTHD(ring_base);
143 vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg);
144 reg = RING_ACTHD_UDW(ring_base);
145 vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg);
148 static int shadow_context_status_change(struct notifier_block *nb,
149 unsigned long action, void *data)
151 struct drm_i915_gem_request *req = (struct drm_i915_gem_request *)data;
152 struct intel_gvt *gvt = container_of(nb, struct intel_gvt,
153 shadow_ctx_notifier_block[req->engine->id]);
154 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
155 enum intel_engine_id ring_id = req->engine->id;
156 struct intel_vgpu_workload *workload;
159 if (!is_gvt_request(req)) {
160 spin_lock_irqsave(&scheduler->mmio_context_lock, flags);
161 if (action == INTEL_CONTEXT_SCHEDULE_IN &&
162 scheduler->engine_owner[ring_id]) {
163 /* Switch ring from vGPU to host. */
164 intel_gvt_switch_mmio(scheduler->engine_owner[ring_id],
166 scheduler->engine_owner[ring_id] = NULL;
168 spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags);
173 workload = scheduler->current_workload[ring_id];
174 if (unlikely(!workload))
178 case INTEL_CONTEXT_SCHEDULE_IN:
179 spin_lock_irqsave(&scheduler->mmio_context_lock, flags);
180 if (workload->vgpu != scheduler->engine_owner[ring_id]) {
181 /* Switch ring from host to vGPU or vGPU to vGPU. */
182 intel_gvt_switch_mmio(scheduler->engine_owner[ring_id],
183 workload->vgpu, ring_id);
184 scheduler->engine_owner[ring_id] = workload->vgpu;
186 gvt_dbg_sched("skip ring %d mmio switch for vgpu%d\n",
187 ring_id, workload->vgpu->id);
188 spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags);
189 atomic_set(&workload->shadow_ctx_active, 1);
191 case INTEL_CONTEXT_SCHEDULE_OUT:
192 case INTEL_CONTEXT_SCHEDULE_PREEMPTED:
193 save_ring_hw_state(workload->vgpu, ring_id);
194 atomic_set(&workload->shadow_ctx_active, 0);
200 wake_up(&workload->shadow_ctx_status_wq);
204 static void shadow_context_descriptor_update(struct i915_gem_context *ctx,
205 struct intel_engine_cs *engine)
207 struct intel_context *ce = &ctx->engine[engine->id];
212 /* Update bits 0-11 of the context descriptor which includes flags
213 * like GEN8_CTX_* cached in desc_template
215 desc &= U64_MAX << 12;
216 desc |= ctx->desc_template & ((1ULL << 12) - 1);
221 static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
223 struct intel_vgpu *vgpu = workload->vgpu;
224 void *shadow_ring_buffer_va;
227 /* allocate shadow ring buffer */
228 cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
230 gvt_vgpu_err("fail to alloc size =%ld shadow ring buffer\n",
235 shadow_ring_buffer_va = workload->shadow_ring_buffer_va;
237 /* get shadow ring buffer va */
238 workload->shadow_ring_buffer_va = cs;
240 memcpy(cs, shadow_ring_buffer_va,
243 cs += workload->rb_len / sizeof(u32);
244 intel_ring_advance(workload->req, cs);
249 void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
251 if (!wa_ctx->indirect_ctx.obj)
254 i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj);
255 i915_gem_object_put(wa_ctx->indirect_ctx.obj);
259 * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and
260 * shadow it as well, include ringbuffer,wa_ctx and ctx.
261 * @workload: an abstract entity for each execlist submission.
263 * This function is called before the workload submitting to i915, to make
264 * sure the content of the workload is valid.
266 int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
268 struct intel_vgpu *vgpu = workload->vgpu;
269 struct intel_vgpu_submission *s = &vgpu->submission;
270 struct i915_gem_context *shadow_ctx = s->shadow_ctx;
271 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
272 int ring_id = workload->ring_id;
273 struct intel_engine_cs *engine = dev_priv->engine[ring_id];
274 struct intel_ring *ring;
277 lockdep_assert_held(&dev_priv->drm.struct_mutex);
279 if (workload->shadowed)
282 shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
283 shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode <<
284 GEN8_CTX_ADDRESSING_MODE_SHIFT;
286 if (!test_and_set_bit(ring_id, s->shadow_ctx_desc_updated))
287 shadow_context_descriptor_update(shadow_ctx,
288 dev_priv->engine[ring_id]);
290 ret = intel_gvt_scan_and_shadow_ringbuffer(workload);
294 if ((workload->ring_id == RCS) &&
295 (workload->wa_ctx.indirect_ctx.size != 0)) {
296 ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
301 /* pin shadow context by gvt even the shadow context will be pinned
302 * when i915 alloc request. That is because gvt will update the guest
303 * context from shadow context when workload is completed, and at that
304 * moment, i915 may already unpined the shadow context to make the
305 * shadow_ctx pages invalid. So gvt need to pin itself. After update
306 * the guest context, gvt can unpin the shadow_ctx safely.
308 ring = engine->context_pin(engine, shadow_ctx);
311 gvt_vgpu_err("fail to pin shadow context\n");
315 ret = populate_shadow_context(workload);
318 workload->shadowed = true;
322 engine->context_unpin(engine, shadow_ctx);
324 release_shadow_wa_ctx(&workload->wa_ctx);
329 static int intel_gvt_generate_request(struct intel_vgpu_workload *workload)
331 int ring_id = workload->ring_id;
332 struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
333 struct intel_engine_cs *engine = dev_priv->engine[ring_id];
334 struct drm_i915_gem_request *rq;
335 struct intel_vgpu *vgpu = workload->vgpu;
336 struct intel_vgpu_submission *s = &vgpu->submission;
337 struct i915_gem_context *shadow_ctx = s->shadow_ctx;
340 rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
342 gvt_vgpu_err("fail to allocate gem request\n");
347 gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq);
349 workload->req = i915_gem_request_get(rq);
350 ret = copy_workload_to_ring_buffer(workload);
356 engine->context_unpin(engine, shadow_ctx);
357 release_shadow_wa_ctx(&workload->wa_ctx);
361 static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload);
363 static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
365 struct intel_gvt *gvt = workload->vgpu->gvt;
366 const int gmadr_bytes = gvt->device_info.gmadr_bytes_in_cmd;
367 struct intel_vgpu_shadow_bb *bb;
370 list_for_each_entry(bb, &workload->shadow_bb, list) {
371 bb->vma = i915_gem_object_ggtt_pin(bb->obj, NULL, 0, 0, 0);
372 if (IS_ERR(bb->vma)) {
373 ret = PTR_ERR(bb->vma);
377 /* relocate shadow batch buffer */
378 bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma);
379 if (gmadr_bytes == 8)
380 bb->bb_start_cmd_va[2] = 0;
382 /* No one is going to touch shadow bb from now on. */
383 if (bb->clflush & CLFLUSH_AFTER) {
384 drm_clflush_virt_range(bb->va, bb->obj->base.size);
385 bb->clflush &= ~CLFLUSH_AFTER;
388 ret = i915_gem_object_set_to_gtt_domain(bb->obj, false);
392 i915_gem_obj_finish_shmem_access(bb->obj);
393 bb->accessing = false;
395 i915_vma_move_to_active(bb->vma, workload->req, 0);
399 release_shadow_batch_buffer(workload);
403 static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
405 struct intel_vgpu_workload *workload = container_of(wa_ctx,
406 struct intel_vgpu_workload,
408 int ring_id = workload->ring_id;
409 struct intel_vgpu_submission *s = &workload->vgpu->submission;
410 struct i915_gem_context *shadow_ctx = s->shadow_ctx;
411 struct drm_i915_gem_object *ctx_obj =
412 shadow_ctx->engine[ring_id].state->obj;
413 struct execlist_ring_context *shadow_ring_context;
416 page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
417 shadow_ring_context = kmap_atomic(page);
419 shadow_ring_context->bb_per_ctx_ptr.val =
420 (shadow_ring_context->bb_per_ctx_ptr.val &
421 (~PER_CTX_ADDR_MASK)) | wa_ctx->per_ctx.shadow_gma;
422 shadow_ring_context->rcs_indirect_ctx.val =
423 (shadow_ring_context->rcs_indirect_ctx.val &
424 (~INDIRECT_CTX_ADDR_MASK)) | wa_ctx->indirect_ctx.shadow_gma;
426 kunmap_atomic(shadow_ring_context);
430 static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
432 struct i915_vma *vma;
433 unsigned char *per_ctx_va =
434 (unsigned char *)wa_ctx->indirect_ctx.shadow_va +
435 wa_ctx->indirect_ctx.size;
437 if (wa_ctx->indirect_ctx.size == 0)
440 vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL,
441 0, CACHELINE_BYTES, 0);
445 /* FIXME: we are not tracking our pinned VMA leaving it
446 * up to the core to fix up the stray pin_count upon
450 wa_ctx->indirect_ctx.shadow_gma = i915_ggtt_offset(vma);
452 wa_ctx->per_ctx.shadow_gma = *((unsigned int *)per_ctx_va + 1);
453 memset(per_ctx_va, 0, CACHELINE_BYTES);
455 update_wa_ctx_2_shadow_ctx(wa_ctx);
459 static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
461 struct intel_vgpu *vgpu = workload->vgpu;
462 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
463 struct intel_vgpu_shadow_bb *bb, *pos;
465 if (list_empty(&workload->shadow_bb))
468 bb = list_first_entry(&workload->shadow_bb,
469 struct intel_vgpu_shadow_bb, list);
471 mutex_lock(&dev_priv->drm.struct_mutex);
473 list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) {
476 i915_gem_obj_finish_shmem_access(bb->obj);
478 if (bb->va && !IS_ERR(bb->va))
479 i915_gem_object_unpin_map(bb->obj);
481 if (bb->vma && !IS_ERR(bb->vma)) {
482 i915_vma_unpin(bb->vma);
483 i915_vma_close(bb->vma);
485 __i915_gem_object_release_unless_active(bb->obj);
491 mutex_unlock(&dev_priv->drm.struct_mutex);
494 static int prepare_workload(struct intel_vgpu_workload *workload)
496 struct intel_vgpu *vgpu = workload->vgpu;
499 ret = intel_vgpu_pin_mm(workload->shadow_mm);
501 gvt_vgpu_err("fail to vgpu pin mm\n");
505 ret = intel_vgpu_sync_oos_pages(workload->vgpu);
507 gvt_vgpu_err("fail to vgpu sync oos pages\n");
511 ret = intel_vgpu_flush_post_shadow(workload->vgpu);
513 gvt_vgpu_err("fail to flush post shadow\n");
517 ret = intel_gvt_generate_request(workload);
519 gvt_vgpu_err("fail to generate request\n");
523 ret = prepare_shadow_batch_buffer(workload);
525 gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n");
529 ret = prepare_shadow_wa_ctx(&workload->wa_ctx);
531 gvt_vgpu_err("fail to prepare_shadow_wa_ctx\n");
532 goto err_shadow_batch;
535 if (workload->prepare) {
536 ret = workload->prepare(workload);
538 goto err_shadow_wa_ctx;
543 release_shadow_wa_ctx(&workload->wa_ctx);
545 release_shadow_batch_buffer(workload);
547 intel_vgpu_unpin_mm(workload->shadow_mm);
551 static int dispatch_workload(struct intel_vgpu_workload *workload)
553 struct intel_vgpu *vgpu = workload->vgpu;
554 struct intel_vgpu_submission *s = &vgpu->submission;
555 struct i915_gem_context *shadow_ctx = s->shadow_ctx;
556 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
557 int ring_id = workload->ring_id;
558 struct intel_engine_cs *engine = dev_priv->engine[ring_id];
561 gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
564 mutex_lock(&dev_priv->drm.struct_mutex);
566 ret = intel_gvt_scan_and_shadow_workload(workload);
570 ret = prepare_workload(workload);
572 engine->context_unpin(engine, shadow_ctx);
578 workload->status = ret;
580 if (!IS_ERR_OR_NULL(workload->req)) {
581 gvt_dbg_sched("ring id %d submit workload to i915 %p\n",
582 ring_id, workload->req);
583 i915_add_request(workload->req);
584 workload->dispatched = true;
587 mutex_unlock(&dev_priv->drm.struct_mutex);
591 static struct intel_vgpu_workload *pick_next_workload(
592 struct intel_gvt *gvt, int ring_id)
594 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
595 struct intel_vgpu_workload *workload = NULL;
597 mutex_lock(&gvt->lock);
600 * no current vgpu / will be scheduled out / no workload
603 if (!scheduler->current_vgpu) {
604 gvt_dbg_sched("ring id %d stop - no current vgpu\n", ring_id);
608 if (scheduler->need_reschedule) {
609 gvt_dbg_sched("ring id %d stop - will reschedule\n", ring_id);
613 if (list_empty(workload_q_head(scheduler->current_vgpu, ring_id)))
617 * still have current workload, maybe the workload disptacher
618 * fail to submit it for some reason, resubmit it.
620 if (scheduler->current_workload[ring_id]) {
621 workload = scheduler->current_workload[ring_id];
622 gvt_dbg_sched("ring id %d still have current workload %p\n",
628 * pick a workload as current workload
629 * once current workload is set, schedule policy routines
630 * will wait the current workload is finished when trying to
631 * schedule out a vgpu.
633 scheduler->current_workload[ring_id] = container_of(
634 workload_q_head(scheduler->current_vgpu, ring_id)->next,
635 struct intel_vgpu_workload, list);
637 workload = scheduler->current_workload[ring_id];
639 gvt_dbg_sched("ring id %d pick new workload %p\n", ring_id, workload);
641 atomic_inc(&workload->vgpu->submission.running_workload_num);
643 mutex_unlock(&gvt->lock);
647 static void update_guest_context(struct intel_vgpu_workload *workload)
649 struct intel_vgpu *vgpu = workload->vgpu;
650 struct intel_gvt *gvt = vgpu->gvt;
651 struct intel_vgpu_submission *s = &vgpu->submission;
652 struct i915_gem_context *shadow_ctx = s->shadow_ctx;
653 int ring_id = workload->ring_id;
654 struct drm_i915_gem_object *ctx_obj =
655 shadow_ctx->engine[ring_id].state->obj;
656 struct execlist_ring_context *shadow_ring_context;
659 unsigned long context_gpa, context_page_num;
662 gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id,
663 workload->ctx_desc.lrca);
665 context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
667 context_page_num = context_page_num >> PAGE_SHIFT;
669 if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS)
670 context_page_num = 19;
674 while (i < context_page_num) {
675 context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
676 (u32)((workload->ctx_desc.lrca + i) <<
677 I915_GTT_PAGE_SHIFT));
678 if (context_gpa == INTEL_GVT_INVALID_ADDR) {
679 gvt_vgpu_err("invalid guest context descriptor\n");
683 page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i);
685 intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src,
691 intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa +
692 RING_CTX_OFF(ring_header.val), &workload->rb_tail, 4);
694 page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
695 shadow_ring_context = kmap(page);
697 #define COPY_REG(name) \
698 intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \
699 RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
702 COPY_REG(ctx_timestamp);
706 intel_gvt_hypervisor_write_gpa(vgpu,
707 workload->ring_context_gpa +
708 sizeof(*shadow_ring_context),
709 (void *)shadow_ring_context +
710 sizeof(*shadow_ring_context),
711 I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
716 static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask)
718 struct intel_vgpu_submission *s = &vgpu->submission;
719 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
720 struct intel_engine_cs *engine;
721 struct intel_vgpu_workload *pos, *n;
724 /* free the unsubmited workloads in the queues. */
725 for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
726 list_for_each_entry_safe(pos, n,
727 &s->workload_q_head[engine->id], list) {
728 list_del_init(&pos->list);
729 intel_vgpu_destroy_workload(pos);
731 clear_bit(engine->id, s->shadow_ctx_desc_updated);
735 static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
737 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
738 struct intel_vgpu_workload *workload =
739 scheduler->current_workload[ring_id];
740 struct intel_vgpu *vgpu = workload->vgpu;
741 struct intel_vgpu_submission *s = &vgpu->submission;
744 mutex_lock(&gvt->lock);
746 /* For the workload w/ request, needs to wait for the context
747 * switch to make sure request is completed.
748 * For the workload w/o request, directly complete the workload.
751 struct drm_i915_private *dev_priv =
752 workload->vgpu->gvt->dev_priv;
753 struct intel_engine_cs *engine =
754 dev_priv->engine[workload->ring_id];
755 wait_event(workload->shadow_ctx_status_wq,
756 !atomic_read(&workload->shadow_ctx_active));
758 /* If this request caused GPU hang, req->fence.error will
759 * be set to -EIO. Use -EIO to set workload status so
760 * that when this request caused GPU hang, didn't trigger
761 * context switch interrupt to guest.
763 if (likely(workload->status == -EINPROGRESS)) {
764 if (workload->req->fence.error == -EIO)
765 workload->status = -EIO;
767 workload->status = 0;
770 i915_gem_request_put(fetch_and_zero(&workload->req));
772 if (!workload->status && !(vgpu->resetting_eng &
773 ENGINE_MASK(ring_id))) {
774 update_guest_context(workload);
776 for_each_set_bit(event, workload->pending_events,
778 intel_vgpu_trigger_virtual_event(vgpu, event);
780 mutex_lock(&dev_priv->drm.struct_mutex);
781 /* unpin shadow ctx as the shadow_ctx update is done */
782 engine->context_unpin(engine, s->shadow_ctx);
783 mutex_unlock(&dev_priv->drm.struct_mutex);
786 gvt_dbg_sched("ring id %d complete workload %p status %d\n",
787 ring_id, workload, workload->status);
789 scheduler->current_workload[ring_id] = NULL;
791 list_del_init(&workload->list);
793 if (!workload->status) {
794 release_shadow_batch_buffer(workload);
795 release_shadow_wa_ctx(&workload->wa_ctx);
798 if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) {
799 /* if workload->status is not successful means HW GPU
800 * has occurred GPU hang or something wrong with i915/GVT,
801 * and GVT won't inject context switch interrupt to guest.
802 * So this error is a vGPU hang actually to the guest.
803 * According to this we should emunlate a vGPU hang. If
804 * there are pending workloads which are already submitted
805 * from guest, we should clean them up like HW GPU does.
807 * if it is in middle of engine resetting, the pending
808 * workloads won't be submitted to HW GPU and will be
809 * cleaned up during the resetting process later, so doing
810 * the workload clean up here doesn't have any impact.
812 clean_workloads(vgpu, ENGINE_MASK(ring_id));
815 workload->complete(workload);
817 atomic_dec(&s->running_workload_num);
818 wake_up(&scheduler->workload_complete_wq);
820 if (gvt->scheduler.need_reschedule)
821 intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED);
823 mutex_unlock(&gvt->lock);
826 struct workload_thread_param {
827 struct intel_gvt *gvt;
831 static int workload_thread(void *priv)
833 struct workload_thread_param *p = (struct workload_thread_param *)priv;
834 struct intel_gvt *gvt = p->gvt;
835 int ring_id = p->ring_id;
836 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
837 struct intel_vgpu_workload *workload = NULL;
838 struct intel_vgpu *vgpu = NULL;
840 bool need_force_wake = IS_SKYLAKE(gvt->dev_priv)
841 || IS_KABYLAKE(gvt->dev_priv);
842 DEFINE_WAIT_FUNC(wait, woken_wake_function);
846 gvt_dbg_core("workload thread for ring %d started\n", ring_id);
848 while (!kthread_should_stop()) {
849 add_wait_queue(&scheduler->waitq[ring_id], &wait);
851 workload = pick_next_workload(gvt, ring_id);
854 wait_woken(&wait, TASK_INTERRUPTIBLE,
855 MAX_SCHEDULE_TIMEOUT);
856 } while (!kthread_should_stop());
857 remove_wait_queue(&scheduler->waitq[ring_id], &wait);
862 gvt_dbg_sched("ring id %d next workload %p vgpu %d\n",
863 workload->ring_id, workload,
866 intel_runtime_pm_get(gvt->dev_priv);
868 gvt_dbg_sched("ring id %d will dispatch workload %p\n",
869 workload->ring_id, workload);
872 intel_uncore_forcewake_get(gvt->dev_priv,
875 mutex_lock(&gvt->lock);
876 ret = dispatch_workload(workload);
877 mutex_unlock(&gvt->lock);
880 vgpu = workload->vgpu;
881 gvt_vgpu_err("fail to dispatch workload, skip\n");
885 gvt_dbg_sched("ring id %d wait workload %p\n",
886 workload->ring_id, workload);
887 i915_wait_request(workload->req, 0, MAX_SCHEDULE_TIMEOUT);
890 gvt_dbg_sched("will complete workload %p, status: %d\n",
891 workload, workload->status);
893 complete_current_workload(gvt, ring_id);
896 intel_uncore_forcewake_put(gvt->dev_priv,
899 intel_runtime_pm_put(gvt->dev_priv);
900 if (ret && (vgpu_is_vm_unhealthy(ret)))
901 enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
906 void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu)
908 struct intel_vgpu_submission *s = &vgpu->submission;
909 struct intel_gvt *gvt = vgpu->gvt;
910 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
912 if (atomic_read(&s->running_workload_num)) {
913 gvt_dbg_sched("wait vgpu idle\n");
915 wait_event(scheduler->workload_complete_wq,
916 !atomic_read(&s->running_workload_num));
920 void intel_gvt_clean_workload_scheduler(struct intel_gvt *gvt)
922 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
923 struct intel_engine_cs *engine;
924 enum intel_engine_id i;
926 gvt_dbg_core("clean workload scheduler\n");
928 for_each_engine(engine, gvt->dev_priv, i) {
929 atomic_notifier_chain_unregister(
930 &engine->context_status_notifier,
931 &gvt->shadow_ctx_notifier_block[i]);
932 kthread_stop(scheduler->thread[i]);
936 int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt)
938 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
939 struct workload_thread_param *param = NULL;
940 struct intel_engine_cs *engine;
941 enum intel_engine_id i;
944 gvt_dbg_core("init workload scheduler\n");
946 init_waitqueue_head(&scheduler->workload_complete_wq);
948 for_each_engine(engine, gvt->dev_priv, i) {
949 init_waitqueue_head(&scheduler->waitq[i]);
951 param = kzalloc(sizeof(*param), GFP_KERNEL);
960 scheduler->thread[i] = kthread_run(workload_thread, param,
961 "gvt workload %d", i);
962 if (IS_ERR(scheduler->thread[i])) {
963 gvt_err("fail to create workload thread\n");
964 ret = PTR_ERR(scheduler->thread[i]);
968 gvt->shadow_ctx_notifier_block[i].notifier_call =
969 shadow_context_status_change;
970 atomic_notifier_chain_register(&engine->context_status_notifier,
971 &gvt->shadow_ctx_notifier_block[i]);
975 intel_gvt_clean_workload_scheduler(gvt);
982 * intel_vgpu_clean_submission - free submission-related resource for vGPU
985 * This function is called when a vGPU is being destroyed.
988 void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
990 struct intel_vgpu_submission *s = &vgpu->submission;
992 intel_vgpu_select_submission_ops(vgpu, 0);
993 i915_gem_context_put(s->shadow_ctx);
994 kmem_cache_destroy(s->workloads);
999 * intel_vgpu_reset_submission - reset submission-related resource for vGPU
1001 * @engine_mask: engines expected to be reset
1003 * This function is called when a vGPU is being destroyed.
1006 void intel_vgpu_reset_submission(struct intel_vgpu *vgpu,
1007 unsigned long engine_mask)
1009 struct intel_vgpu_submission *s = &vgpu->submission;
1014 clean_workloads(vgpu, engine_mask);
1015 s->ops->reset(vgpu, engine_mask);
1019 * intel_vgpu_setup_submission - setup submission-related resource for vGPU
1022 * This function is called when a vGPU is being created.
1025 * Zero on success, negative error code if failed.
1028 int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
1030 struct intel_vgpu_submission *s = &vgpu->submission;
1031 enum intel_engine_id i;
1032 struct intel_engine_cs *engine;
1035 s->shadow_ctx = i915_gem_context_create_gvt(
1036 &vgpu->gvt->dev_priv->drm);
1037 if (IS_ERR(s->shadow_ctx))
1038 return PTR_ERR(s->shadow_ctx);
1040 bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
1042 s->workloads = kmem_cache_create("gvt-g_vgpu_workload",
1043 sizeof(struct intel_vgpu_workload), 0,
1047 if (!s->workloads) {
1049 goto out_shadow_ctx;
1052 for_each_engine(engine, vgpu->gvt->dev_priv, i)
1053 INIT_LIST_HEAD(&s->workload_q_head[i]);
1055 atomic_set(&s->running_workload_num, 0);
1056 bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
1061 i915_gem_context_put(s->shadow_ctx);
1066 * intel_vgpu_select_submission_ops - select virtual submission interface
1068 * @interface: expected vGPU virtual submission interface
1070 * This function is called when guest configures submission interface.
1073 * Zero on success, negative error code if failed.
1076 int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu,
1077 unsigned int interface)
1079 struct intel_vgpu_submission *s = &vgpu->submission;
1080 const struct intel_vgpu_submission_ops *ops[] = {
1081 [INTEL_VGPU_EXECLIST_SUBMISSION] =
1082 &intel_vgpu_execlist_submission_ops,
1086 if (WARN_ON(interface >= ARRAY_SIZE(ops)))
1090 s->ops->clean(vgpu);
1092 gvt_dbg_core("vgpu%d: de-select ops [ %s ] \n",
1093 vgpu->id, s->ops->name);
1096 if (interface == 0) {
1098 s->virtual_submission_interface = 0;
1099 gvt_dbg_core("vgpu%d: no submission ops\n", vgpu->id);
1103 ret = ops[interface]->init(vgpu);
1107 s->ops = ops[interface];
1108 s->virtual_submission_interface = interface;
1111 gvt_dbg_core("vgpu%d: activate ops [ %s ]\n",
1112 vgpu->id, s->ops->name);
1118 * intel_vgpu_destroy_workload - destroy a vGPU workload
1121 * This function is called when destroy a vGPU workload.
1124 void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload)
1126 struct intel_vgpu_submission *s = &workload->vgpu->submission;
1128 if (workload->shadow_mm)
1129 intel_gvt_mm_unreference(workload->shadow_mm);
1131 kmem_cache_free(s->workloads, workload);
1134 static struct intel_vgpu_workload *
1135 alloc_workload(struct intel_vgpu *vgpu)
1137 struct intel_vgpu_submission *s = &vgpu->submission;
1138 struct intel_vgpu_workload *workload;
1140 workload = kmem_cache_zalloc(s->workloads, GFP_KERNEL);
1142 return ERR_PTR(-ENOMEM);
1144 INIT_LIST_HEAD(&workload->list);
1145 INIT_LIST_HEAD(&workload->shadow_bb);
1147 init_waitqueue_head(&workload->shadow_ctx_status_wq);
1148 atomic_set(&workload->shadow_ctx_active, 0);
1150 workload->status = -EINPROGRESS;
1151 workload->shadowed = false;
1152 workload->vgpu = vgpu;
1157 #define RING_CTX_OFF(x) \
1158 offsetof(struct execlist_ring_context, x)
1160 static void read_guest_pdps(struct intel_vgpu *vgpu,
1161 u64 ring_context_gpa, u32 pdp[8])
1166 gpa = ring_context_gpa + RING_CTX_OFF(pdp3_UDW.val);
1168 for (i = 0; i < 8; i++)
1169 intel_gvt_hypervisor_read_gpa(vgpu,
1170 gpa + i * 8, &pdp[7 - i], 4);
1173 static int prepare_mm(struct intel_vgpu_workload *workload)
1175 struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
1176 struct intel_vgpu_mm *mm;
1177 struct intel_vgpu *vgpu = workload->vgpu;
1178 int page_table_level;
1181 if (desc->addressing_mode == 1) { /* legacy 32-bit */
1182 page_table_level = 3;
1183 } else if (desc->addressing_mode == 3) { /* legacy 64 bit */
1184 page_table_level = 4;
1186 gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n");
1190 read_guest_pdps(workload->vgpu, workload->ring_context_gpa, pdp);
1192 mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, page_table_level, pdp);
1194 intel_gvt_mm_reference(mm);
1197 mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT,
1198 pdp, page_table_level, 0);
1200 gvt_vgpu_err("fail to create mm object.\n");
1204 workload->shadow_mm = mm;
1208 #define same_context(a, b) (((a)->context_id == (b)->context_id) && \
1209 ((a)->lrca == (b)->lrca))
1211 #define get_last_workload(q) \
1212 (list_empty(q) ? NULL : container_of(q->prev, \
1213 struct intel_vgpu_workload, list))
1215 * intel_vgpu_create_workload - create a vGPU workload
1217 * @desc: a guest context descriptor
1219 * This function is called when creating a vGPU workload.
1222 * struct intel_vgpu_workload * on success, negative error code in
1223 * pointer if failed.
1226 struct intel_vgpu_workload *
1227 intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
1228 struct execlist_ctx_descriptor_format *desc)
1230 struct intel_vgpu_submission *s = &vgpu->submission;
1231 struct list_head *q = workload_q_head(vgpu, ring_id);
1232 struct intel_vgpu_workload *last_workload = get_last_workload(q);
1233 struct intel_vgpu_workload *workload = NULL;
1234 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
1235 u64 ring_context_gpa;
1236 u32 head, tail, start, ctl, ctx_ctl, per_ctx, indirect_ctx;
1239 ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
1240 (u32)((desc->lrca + 1) << I915_GTT_PAGE_SHIFT));
1241 if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) {
1242 gvt_vgpu_err("invalid guest context LRCA: %x\n", desc->lrca);
1243 return ERR_PTR(-EINVAL);
1246 intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1247 RING_CTX_OFF(ring_header.val), &head, 4);
1249 intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1250 RING_CTX_OFF(ring_tail.val), &tail, 4);
1252 head &= RB_HEAD_OFF_MASK;
1253 tail &= RB_TAIL_OFF_MASK;
1255 if (last_workload && same_context(&last_workload->ctx_desc, desc)) {
1256 gvt_dbg_el("ring id %d cur workload == last\n", ring_id);
1257 gvt_dbg_el("ctx head %x real head %lx\n", head,
1258 last_workload->rb_tail);
1260 * cannot use guest context head pointer here,
1261 * as it might not be updated at this time
1263 head = last_workload->rb_tail;
1266 gvt_dbg_el("ring id %d begin a new workload\n", ring_id);
1268 /* record some ring buffer register values for scan and shadow */
1269 intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1270 RING_CTX_OFF(rb_start.val), &start, 4);
1271 intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1272 RING_CTX_OFF(rb_ctrl.val), &ctl, 4);
1273 intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1274 RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4);
1276 workload = alloc_workload(vgpu);
1277 if (IS_ERR(workload))
1280 workload->ring_id = ring_id;
1281 workload->ctx_desc = *desc;
1282 workload->ring_context_gpa = ring_context_gpa;
1283 workload->rb_head = head;
1284 workload->rb_tail = tail;
1285 workload->rb_start = start;
1286 workload->rb_ctl = ctl;
1288 if (ring_id == RCS) {
1289 intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1290 RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4);
1291 intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1292 RING_CTX_OFF(rcs_indirect_ctx.val), &indirect_ctx, 4);
1294 workload->wa_ctx.indirect_ctx.guest_gma =
1295 indirect_ctx & INDIRECT_CTX_ADDR_MASK;
1296 workload->wa_ctx.indirect_ctx.size =
1297 (indirect_ctx & INDIRECT_CTX_SIZE_MASK) *
1299 workload->wa_ctx.per_ctx.guest_gma =
1300 per_ctx & PER_CTX_ADDR_MASK;
1301 workload->wa_ctx.per_ctx.valid = per_ctx & 1;
1304 gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n",
1305 workload, ring_id, head, tail, start, ctl);
1307 ret = prepare_mm(workload);
1309 kmem_cache_free(s->workloads, workload);
1310 return ERR_PTR(ret);
1313 /* Only scan and shadow the first workload in the queue
1314 * as there is only one pre-allocated buf-obj for shadow.
1316 if (list_empty(workload_q_head(vgpu, ring_id))) {
1317 intel_runtime_pm_get(dev_priv);
1318 mutex_lock(&dev_priv->drm.struct_mutex);
1319 ret = intel_gvt_scan_and_shadow_workload(workload);
1320 mutex_unlock(&dev_priv->drm.struct_mutex);
1321 intel_runtime_pm_put(dev_priv);
1324 if (ret && (vgpu_is_vm_unhealthy(ret))) {
1325 enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
1326 intel_vgpu_destroy_workload(workload);
1327 return ERR_PTR(ret);