1 // SPDX-License-Identifier: MIT
3 * Copyright © 2021 Intel Corporation
8 #include <linux/nospec.h>
10 #include <drm/drm_device.h>
11 #include <drm/drm_file.h>
12 #include <drm/xe_drm.h>
14 #include "xe_device.h"
16 #include "xe_hw_fence.h"
18 #include "xe_macros.h"
19 #include "xe_migrate.h"
24 static struct xe_engine *__xe_engine_create(struct xe_device *xe,
27 u16 width, struct xe_hw_engine *hwe,
31 struct xe_gt *gt = hwe->gt;
35 e = kzalloc(sizeof(*e) + sizeof(struct xe_lrc) * width, GFP_KERNEL);
37 return ERR_PTR(-ENOMEM);
39 kref_init(&e->refcount);
44 e->vm = xe_vm_get(vm);
45 e->class = hwe->class;
47 e->logical_mask = logical_mask;
48 e->fence_irq = >->fence_irq[hwe->class];
49 e->ring_ops = gt->ring_ops[hwe->class];
50 e->ops = gt->engine_ops;
51 INIT_LIST_HEAD(&e->persistent.link);
52 INIT_LIST_HEAD(&e->compute.link);
53 INIT_LIST_HEAD(&e->multi_gt_link);
55 /* FIXME: Wire up to configurable default value */
56 e->sched_props.timeslice_us = 1 * 1000;
57 e->sched_props.preempt_timeout_us = 640 * 1000;
59 if (xe_engine_is_parallel(e)) {
60 e->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
61 e->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO;
63 if (e->flags & ENGINE_FLAG_VM) {
64 e->bind.fence_ctx = dma_fence_context_alloc(1);
65 e->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO;
68 for (i = 0; i < width; ++i) {
69 err = xe_lrc_init(e->lrc + i, hwe, e, vm, SZ_16K);
74 err = e->ops->init(e);
81 for (i = i - 1; i >= 0; --i)
82 xe_lrc_finish(e->lrc + i);
87 struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm,
88 u32 logical_mask, u16 width,
89 struct xe_hw_engine *hwe, u32 flags)
91 struct ww_acquire_ctx ww;
96 err = xe_vm_lock(vm, &ww, 0, true);
100 e = __xe_engine_create(xe, vm, logical_mask, width, hwe, flags);
102 xe_vm_unlock(vm, &ww);
107 struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt,
109 enum xe_engine_class class, u32 flags)
111 struct xe_hw_engine *hwe, *hwe0 = NULL;
112 enum xe_hw_engine_id id;
113 u32 logical_mask = 0;
115 for_each_hw_engine(hwe, gt, id) {
116 if (xe_hw_engine_is_reserved(hwe))
119 if (hwe->class == class) {
120 logical_mask |= BIT(hwe->logical_instance);
127 return ERR_PTR(-ENODEV);
129 return xe_engine_create(xe, vm, logical_mask, 1, hwe0, flags);
132 void xe_engine_destroy(struct kref *ref)
134 struct xe_engine *e = container_of(ref, struct xe_engine, refcount);
135 struct xe_engine *engine, *next;
137 if (!(e->flags & ENGINE_FLAG_BIND_ENGINE_CHILD)) {
138 list_for_each_entry_safe(engine, next, &e->multi_gt_list,
140 xe_engine_put(engine);
146 void xe_engine_fini(struct xe_engine *e)
150 for (i = 0; i < e->width; ++i)
151 xe_lrc_finish(e->lrc + i);
158 struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id)
162 mutex_lock(&xef->engine.lock);
163 e = xa_load(&xef->engine.xa, id);
164 mutex_unlock(&xef->engine.lock);
172 static int engine_set_priority(struct xe_device *xe, struct xe_engine *e,
173 u64 value, bool create)
175 if (XE_IOCTL_ERR(xe, value > XE_ENGINE_PRIORITY_HIGH))
178 if (XE_IOCTL_ERR(xe, value == XE_ENGINE_PRIORITY_HIGH &&
179 !capable(CAP_SYS_NICE)))
182 return e->ops->set_priority(e, value);
185 static int engine_set_timeslice(struct xe_device *xe, struct xe_engine *e,
186 u64 value, bool create)
188 if (!capable(CAP_SYS_NICE))
191 return e->ops->set_timeslice(e, value);
194 static int engine_set_preemption_timeout(struct xe_device *xe,
195 struct xe_engine *e, u64 value,
198 if (!capable(CAP_SYS_NICE))
201 return e->ops->set_preempt_timeout(e, value);
204 static int engine_set_compute_mode(struct xe_device *xe, struct xe_engine *e,
205 u64 value, bool create)
207 if (XE_IOCTL_ERR(xe, !create))
210 if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE))
213 if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_VM))
217 struct xe_vm *vm = e->vm;
220 if (XE_IOCTL_ERR(xe, xe_vm_in_fault_mode(vm)))
223 if (XE_IOCTL_ERR(xe, !xe_vm_in_compute_mode(vm)))
226 if (XE_IOCTL_ERR(xe, e->width != 1))
229 e->compute.context = dma_fence_context_alloc(1);
230 spin_lock_init(&e->compute.lock);
232 err = xe_vm_add_compute_engine(vm, e);
233 if (XE_IOCTL_ERR(xe, err))
236 e->flags |= ENGINE_FLAG_COMPUTE_MODE;
237 e->flags &= ~ENGINE_FLAG_PERSISTENT;
243 static int engine_set_persistence(struct xe_device *xe, struct xe_engine *e,
244 u64 value, bool create)
246 if (XE_IOCTL_ERR(xe, !create))
249 if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE))
253 e->flags |= ENGINE_FLAG_PERSISTENT;
255 e->flags &= ~ENGINE_FLAG_PERSISTENT;
260 static int engine_set_job_timeout(struct xe_device *xe, struct xe_engine *e,
261 u64 value, bool create)
263 if (XE_IOCTL_ERR(xe, !create))
266 if (!capable(CAP_SYS_NICE))
269 return e->ops->set_job_timeout(e, value);
272 static int engine_set_acc_trigger(struct xe_device *xe, struct xe_engine *e,
273 u64 value, bool create)
275 if (XE_IOCTL_ERR(xe, !create))
278 if (XE_IOCTL_ERR(xe, !xe->info.supports_usm))
281 e->usm.acc_trigger = value;
286 static int engine_set_acc_notify(struct xe_device *xe, struct xe_engine *e,
287 u64 value, bool create)
289 if (XE_IOCTL_ERR(xe, !create))
292 if (XE_IOCTL_ERR(xe, !xe->info.supports_usm))
295 e->usm.acc_notify = value;
300 static int engine_set_acc_granularity(struct xe_device *xe, struct xe_engine *e,
301 u64 value, bool create)
303 if (XE_IOCTL_ERR(xe, !create))
306 if (XE_IOCTL_ERR(xe, !xe->info.supports_usm))
309 e->usm.acc_granularity = value;
314 typedef int (*xe_engine_set_property_fn)(struct xe_device *xe,
316 u64 value, bool create);
318 static const xe_engine_set_property_fn engine_set_property_funcs[] = {
319 [XE_ENGINE_SET_PROPERTY_PRIORITY] = engine_set_priority,
320 [XE_ENGINE_SET_PROPERTY_TIMESLICE] = engine_set_timeslice,
321 [XE_ENGINE_SET_PROPERTY_PREEMPTION_TIMEOUT] = engine_set_preemption_timeout,
322 [XE_ENGINE_SET_PROPERTY_COMPUTE_MODE] = engine_set_compute_mode,
323 [XE_ENGINE_SET_PROPERTY_PERSISTENCE] = engine_set_persistence,
324 [XE_ENGINE_SET_PROPERTY_JOB_TIMEOUT] = engine_set_job_timeout,
325 [XE_ENGINE_SET_PROPERTY_ACC_TRIGGER] = engine_set_acc_trigger,
326 [XE_ENGINE_SET_PROPERTY_ACC_NOTIFY] = engine_set_acc_notify,
327 [XE_ENGINE_SET_PROPERTY_ACC_GRANULARITY] = engine_set_acc_granularity,
330 static int engine_user_ext_set_property(struct xe_device *xe,
335 u64 __user *address = u64_to_user_ptr(extension);
336 struct drm_xe_ext_engine_set_property ext;
340 err = __copy_from_user(&ext, address, sizeof(ext));
341 if (XE_IOCTL_ERR(xe, err))
344 if (XE_IOCTL_ERR(xe, ext.property >=
345 ARRAY_SIZE(engine_set_property_funcs)))
348 idx = array_index_nospec(ext.property, ARRAY_SIZE(engine_set_property_funcs));
349 return engine_set_property_funcs[idx](xe, e, ext.value, create);
352 typedef int (*xe_engine_user_extension_fn)(struct xe_device *xe,
357 static const xe_engine_set_property_fn engine_user_extension_funcs[] = {
358 [XE_ENGINE_EXTENSION_SET_PROPERTY] = engine_user_ext_set_property,
361 #define MAX_USER_EXTENSIONS 16
362 static int engine_user_extensions(struct xe_device *xe, struct xe_engine *e,
363 u64 extensions, int ext_number, bool create)
365 u64 __user *address = u64_to_user_ptr(extensions);
366 struct xe_user_extension ext;
370 if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS))
373 err = __copy_from_user(&ext, address, sizeof(ext));
374 if (XE_IOCTL_ERR(xe, err))
377 if (XE_IOCTL_ERR(xe, ext.name >=
378 ARRAY_SIZE(engine_user_extension_funcs)))
381 idx = array_index_nospec(ext.name,
382 ARRAY_SIZE(engine_user_extension_funcs));
383 err = engine_user_extension_funcs[idx](xe, e, extensions, create);
384 if (XE_IOCTL_ERR(xe, err))
387 if (ext.next_extension)
388 return engine_user_extensions(xe, e, ext.next_extension,
389 ++ext_number, create);
394 static const enum xe_engine_class user_to_xe_engine_class[] = {
395 [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
396 [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
397 [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
398 [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
399 [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
402 static struct xe_hw_engine *
403 find_hw_engine(struct xe_device *xe,
404 struct drm_xe_engine_class_instance eci)
408 if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class))
411 if (eci.gt_id >= xe->info.tile_count)
414 idx = array_index_nospec(eci.engine_class,
415 ARRAY_SIZE(user_to_xe_engine_class));
417 return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id),
418 user_to_xe_engine_class[idx],
419 eci.engine_instance, true);
422 static u32 bind_engine_logical_mask(struct xe_device *xe, struct xe_gt *gt,
423 struct drm_xe_engine_class_instance *eci,
424 u16 width, u16 num_placements)
426 struct xe_hw_engine *hwe;
427 enum xe_hw_engine_id id;
428 u32 logical_mask = 0;
430 if (XE_IOCTL_ERR(xe, width != 1))
432 if (XE_IOCTL_ERR(xe, num_placements != 1))
434 if (XE_IOCTL_ERR(xe, eci[0].engine_instance != 0))
437 eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY;
439 for_each_hw_engine(hwe, gt, id) {
440 if (xe_hw_engine_is_reserved(hwe))
444 user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY])
445 logical_mask |= BIT(hwe->logical_instance);
451 static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
452 struct drm_xe_engine_class_instance *eci,
453 u16 width, u16 num_placements)
455 int len = width * num_placements;
459 u32 return_mask = 0, prev_mask;
461 if (XE_IOCTL_ERR(xe, !xe_device_guc_submission_enabled(xe) &&
465 for (i = 0; i < width; ++i) {
466 u32 current_mask = 0;
468 for (j = 0; j < num_placements; ++j) {
469 struct xe_hw_engine *hwe;
473 hwe = find_hw_engine(xe, eci[n]);
474 if (XE_IOCTL_ERR(xe, !hwe))
477 if (XE_IOCTL_ERR(xe, xe_hw_engine_is_reserved(hwe)))
480 if (XE_IOCTL_ERR(xe, n && eci[n].gt_id != gt_id) ||
481 XE_IOCTL_ERR(xe, n && eci[n].engine_class != class))
484 class = eci[n].engine_class;
485 gt_id = eci[n].gt_id;
487 if (width == 1 || !i)
488 return_mask |= BIT(eci[n].engine_instance);
489 current_mask |= BIT(eci[n].engine_instance);
492 /* Parallel submissions must be logically contiguous */
493 if (i && XE_IOCTL_ERR(xe, current_mask != prev_mask << 1))
496 prev_mask = current_mask;
502 int xe_engine_create_ioctl(struct drm_device *dev, void *data,
503 struct drm_file *file)
505 struct xe_device *xe = to_xe_device(dev);
506 struct xe_file *xef = to_xe_file(file);
507 struct drm_xe_engine_create *args = data;
508 struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
509 struct drm_xe_engine_class_instance __user *user_eci =
510 u64_to_user_ptr(args->instances);
511 struct xe_hw_engine *hwe;
512 struct xe_vm *vm, *migrate_vm;
514 struct xe_engine *e = NULL;
520 if (XE_IOCTL_ERR(xe, args->flags))
523 len = args->width * args->num_placements;
524 if (XE_IOCTL_ERR(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE))
527 err = __copy_from_user(eci, user_eci,
528 sizeof(struct drm_xe_engine_class_instance) *
530 if (XE_IOCTL_ERR(xe, err))
533 if (XE_IOCTL_ERR(xe, eci[0].gt_id >= xe->info.tile_count))
536 xe_pm_runtime_get(xe);
538 if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
539 for_each_gt(gt, xe, id) {
540 struct xe_engine *new;
542 if (xe_gt_is_media_type(gt))
545 eci[0].gt_id = gt->info.id;
546 logical_mask = bind_engine_logical_mask(xe, gt, eci,
548 args->num_placements);
549 if (XE_IOCTL_ERR(xe, !logical_mask)) {
554 hwe = find_hw_engine(xe, eci[0]);
555 if (XE_IOCTL_ERR(xe, !hwe)) {
560 migrate_vm = xe_migrate_get_vm(gt->migrate);
561 new = xe_engine_create(xe, migrate_vm, logical_mask,
563 ENGINE_FLAG_PERSISTENT |
566 ENGINE_FLAG_BIND_ENGINE_CHILD :
568 xe_vm_put(migrate_vm);
578 list_add_tail(&new->multi_gt_list,
582 gt = xe_device_get_gt(xe, eci[0].gt_id);
583 logical_mask = calc_validate_logical_mask(xe, gt, eci,
585 args->num_placements);
586 if (XE_IOCTL_ERR(xe, !logical_mask)) {
591 hwe = find_hw_engine(xe, eci[0]);
592 if (XE_IOCTL_ERR(xe, !hwe)) {
597 vm = xe_vm_lookup(xef, args->vm_id);
598 if (XE_IOCTL_ERR(xe, !vm)) {
603 e = xe_engine_create(xe, vm, logical_mask,
604 args->width, hwe, ENGINE_FLAG_PERSISTENT);
612 if (args->extensions) {
613 err = engine_user_extensions(xe, e, args->extensions, 0, true);
614 if (XE_IOCTL_ERR(xe, err))
618 if (XE_IOCTL_ERR(xe, e->vm && xe_vm_in_compute_mode(e->vm) !=
619 !!(e->flags & ENGINE_FLAG_COMPUTE_MODE))) {
624 e->persistent.xef = xef;
626 mutex_lock(&xef->engine.lock);
627 err = xa_alloc(&xef->engine.xa, &id, e, xa_limit_32b, GFP_KERNEL);
628 mutex_unlock(&xef->engine.lock);
632 args->engine_id = id;
640 xe_pm_runtime_put(xe);
644 int xe_engine_get_property_ioctl(struct drm_device *dev, void *data,
645 struct drm_file *file)
647 struct xe_device *xe = to_xe_device(dev);
648 struct xe_file *xef = to_xe_file(file);
649 struct drm_xe_engine_get_property *args = data;
652 mutex_lock(&xef->engine.lock);
653 e = xa_load(&xef->engine.xa, args->engine_id);
654 mutex_unlock(&xef->engine.lock);
656 if (XE_IOCTL_ERR(xe, !e))
659 switch (args->property) {
660 case XE_ENGINE_GET_PROPERTY_BAN:
661 args->value = !!(e->flags & ENGINE_FLAG_BANNED);
670 static void engine_kill_compute(struct xe_engine *e)
672 if (!xe_vm_in_compute_mode(e->vm))
675 down_write(&e->vm->lock);
676 list_del(&e->compute.link);
677 --e->vm->preempt.num_engines;
678 if (e->compute.pfence) {
679 dma_fence_enable_sw_signaling(e->compute.pfence);
680 dma_fence_put(e->compute.pfence);
681 e->compute.pfence = NULL;
683 up_write(&e->vm->lock);
687 * xe_engine_is_idle() - Whether an engine is idle.
688 * @engine: The engine
690 * FIXME: Need to determine what to use as the short-lived
691 * timeline lock for the engines, so that the return value
692 * of this function becomes more than just an advisory
693 * snapshot in time. The timeline lock must protect the
694 * seqno from racing submissions on the same engine.
695 * Typically vm->resv, but user-created timeline locks use the migrate vm
696 * and never grabs the migrate vm->resv so we have a race there.
698 * Return: True if the engine is idle, false otherwise.
700 bool xe_engine_is_idle(struct xe_engine *engine)
702 if (XE_WARN_ON(xe_engine_is_parallel(engine)))
705 return xe_lrc_seqno(&engine->lrc[0]) ==
706 engine->lrc[0].fence_ctx.next_seqno - 1;
709 void xe_engine_kill(struct xe_engine *e)
711 struct xe_engine *engine = e, *next;
713 list_for_each_entry_safe(engine, next, &engine->multi_gt_list,
715 e->ops->kill(engine);
716 engine_kill_compute(engine);
720 engine_kill_compute(e);
723 int xe_engine_destroy_ioctl(struct drm_device *dev, void *data,
724 struct drm_file *file)
726 struct xe_device *xe = to_xe_device(dev);
727 struct xe_file *xef = to_xe_file(file);
728 struct drm_xe_engine_destroy *args = data;
731 if (XE_IOCTL_ERR(xe, args->pad))
734 mutex_lock(&xef->engine.lock);
735 e = xa_erase(&xef->engine.xa, args->engine_id);
736 mutex_unlock(&xef->engine.lock);
737 if (XE_IOCTL_ERR(xe, !e))
740 if (!(e->flags & ENGINE_FLAG_PERSISTENT))
743 xe_device_add_persistent_engines(xe, e);
745 trace_xe_engine_close(e);
747 xe_pm_runtime_put(xe);
752 int xe_engine_set_property_ioctl(struct drm_device *dev, void *data,
753 struct drm_file *file)
755 struct xe_device *xe = to_xe_device(dev);
756 struct xe_file *xef = to_xe_file(file);
757 struct drm_xe_engine_set_property *args = data;
762 e = xe_engine_lookup(xef, args->engine_id);
763 if (XE_IOCTL_ERR(xe, !e))
766 if (XE_IOCTL_ERR(xe, args->property >=
767 ARRAY_SIZE(engine_set_property_funcs))) {
772 idx = array_index_nospec(args->property,
773 ARRAY_SIZE(engine_set_property_funcs));
774 ret = engine_set_property_funcs[idx](xe, e, args->value, false);
775 if (XE_IOCTL_ERR(xe, ret))
778 if (args->extensions)
779 ret = engine_user_extensions(xe, e, args->extensions, 0,