We really only need memcpy restore for objects that affect the
operability of the migrate context. That is, primarily the page-table
objects of the migrate VM.
Add an object flag, I915_BO_ALLOC_PM_EARLY for objects that need early
restores using memcpy and a way to assign LMEM page-table object flags
to be used by the vms.
Restore objects without this flag with the gpu blitter and only objects
carrying the flag using TTM memcpy.
Initially mark the migrate, gt, gtt and vgpu vms to use this flag, and
defer for a later audit which vms actually need it. Most importantly, user-
allocated vms with pinned page-table objects can be restored using the
blitter.
Performance-wise memcpy restore is probably as fast as gpu restore if not
faster, but using gpu restore will help tackling future restrictions in
mappable LMEM size.
v4:
- Don't mark the aliasing ppgtt page table flags for early resume, but
rather the ggtt page table flags as intended. (Matthew Auld)
- The check for user buffer objects during early resume is pointless, since
they are never marked I915_BO_ALLOC_PM_EARLY. (Matthew Auld)
v5:
- Mark GuC LMEM objects with I915_BO_ALLOC_PM_EARLY to have them restored
before we fire up the migrate context.
Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210922062527.865433-8-thomas.hellstrom@linux.intel.com
} else if (HAS_FULL_PPGTT(i915)) {
struct i915_ppgtt *ppgtt;
- ppgtt = i915_ppgtt_create(&i915->gt);
+ ppgtt = i915_ppgtt_create(&i915->gt, 0);
if (IS_ERR(ppgtt)) {
drm_dbg(&i915->drm, "PPGTT setup failed (%ld)\n",
PTR_ERR(ppgtt));
if (args->flags)
return -EINVAL;
- ppgtt = i915_ppgtt_create(&i915->gt);
+ ppgtt = i915_ppgtt_create(&i915->gt, 0);
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
#define I915_BO_ALLOC_USER BIT(3)
/* Object is allowed to lose its contents on suspend / resume, even if pinned */
#define I915_BO_ALLOC_PM_VOLATILE BIT(4)
+/* Object needs to be restored early using memcpy during resume */
+#define I915_BO_ALLOC_PM_EARLY BIT(5)
#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
I915_BO_ALLOC_VOLATILE | \
I915_BO_ALLOC_CPU_CLEAR | \
I915_BO_ALLOC_USER | \
- I915_BO_ALLOC_PM_VOLATILE)
-#define I915_BO_READONLY BIT(5)
-#define I915_TILING_QUIRK_BIT 6 /* unknown swizzling; do not release! */
+ I915_BO_ALLOC_PM_VOLATILE | \
+ I915_BO_ALLOC_PM_EARLY)
+#define I915_BO_READONLY BIT(6)
+#define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */
/**
* @mem_flags - Mutable placement-related flags
* More objects may have become unpinned as requests were
* retired. Now try to evict again. The gt may be wedged here
* in which case we automatically fall back to memcpy.
+ * We allow also backing up pinned objects that have not been
+ * marked for early recover, and that may contain, for example,
+ * page-tables for the migrate context.
*/
- ret = lmem_suspend(i915, I915_TTM_BACKUP_ALLOW_GPU);
+ ret = lmem_suspend(i915, I915_TTM_BACKUP_ALLOW_GPU |
+ I915_TTM_BACKUP_PINNED);
if (ret)
goto out_recover;
if (pm_apply->allow_gpu && i915_gem_object_evictable(obj))
return ttm_bo_validate(bo, i915_ttm_sys_placement(), &ctx);
- if (!pm_apply->backup_pinned)
+ if (!pm_apply->backup_pinned ||
+ (pm_apply->allow_gpu && (obj->flags & I915_BO_ALLOC_PM_EARLY)))
return 0;
if (obj->flags & I915_BO_ALLOC_PM_VOLATILE)
if (!backup)
return 0;
- if (!pm_apply->allow_gpu && (obj->flags & I915_BO_ALLOC_USER))
+ if (!pm_apply->allow_gpu && !(obj->flags & I915_BO_ALLOC_PM_EARLY))
return 0;
err = i915_gem_object_lock(backup, apply->ww);
mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL;
mkwrite_device_info(dev_priv)->ppgtt_size = 48;
- ppgtt = i915_ppgtt_create(&dev_priv->gt);
+ ppgtt = i915_ppgtt_create(&dev_priv->gt, 0);
if (IS_ERR(ppgtt)) {
err = PTR_ERR(ppgtt);
goto out_unlock;
mutex_init(&ppgtt->flush);
mutex_init(&ppgtt->pin_mutex);
- ppgtt_init(&ppgtt->base, gt);
+ ppgtt_init(&ppgtt->base, gt, 0);
ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
ppgtt->base.vm.top = 1;
* space.
*
*/
-struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
+struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
+ unsigned long lmem_pt_obj_flags)
{
struct i915_ppgtt *ppgtt;
int err;
if (!ppgtt)
return ERR_PTR(-ENOMEM);
- ppgtt_init(ppgtt, gt);
+ ppgtt_init(ppgtt, gt, lmem_pt_obj_flags);
ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t));
struct intel_gt;
enum i915_cache_level;
-struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt);
+struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
+ unsigned long lmem_pt_obj_flags);
+
u64 gen8_ggtt_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
u32 flags);
struct i915_ppgtt *ppgtt;
int err;
- ppgtt = i915_ppgtt_create(ggtt->vm.gt);
+ ppgtt = i915_ppgtt_create(ggtt->vm.gt, 0);
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
size = gen8_get_total_gtt_size(snb_gmch_ctl);
ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+ ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;
ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
ggtt->vm.cleanup = gen6_gmch_remove;
static struct i915_address_space *kernel_vm(struct intel_gt *gt)
{
if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING)
- return &i915_ppgtt_create(gt)->vm;
+ return &i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY)->vm;
else
return i915_vm_get(>->ggtt->vm);
}
* used the passed in size for the page size, which should ensure it
* also has the same alignment.
*/
- obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, 0);
+ obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz,
+ vm->lmem_pt_obj_flags);
/*
* Ensure all paging structures for this vm share the same dma-resv
* object underneath, with the idea that one object_lock() will lock
u8 pd_shift;
u8 scratch_order;
+ /* Flags used when creating page-table objects for this vm */
+ unsigned long lmem_pt_obj_flags;
+
struct drm_i915_gem_object *
(*alloc_pt_dma)(struct i915_address_space *vm, int sz);
return __px_dma(pt ? px_base(pt) : ppgtt->vm.scratch[ppgtt->vm.top]);
}
-void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt);
+void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt,
+ unsigned long lmem_pt_obj_flags);
int i915_ggtt_probe_hw(struct drm_i915_private *i915);
int i915_ggtt_init_hw(struct drm_i915_private *i915);
int i915_ppgtt_init_hw(struct intel_gt *gt);
-struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt);
+struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt,
+ unsigned long lmem_pt_obj_flags);
void i915_ggtt_suspend(struct i915_ggtt *gtt);
void i915_ggtt_resume(struct i915_ggtt *ggtt);
* TODO: Add support for huge LMEM PTEs
*/
- vm = i915_ppgtt_create(gt);
+ vm = i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY);
if (IS_ERR(vm))
return ERR_CAST(vm);
}
static struct i915_ppgtt *
-__ppgtt_create(struct intel_gt *gt)
+__ppgtt_create(struct intel_gt *gt, unsigned long lmem_pt_obj_flags)
{
if (GRAPHICS_VER(gt->i915) < 8)
return gen6_ppgtt_create(gt);
else
- return gen8_ppgtt_create(gt);
+ return gen8_ppgtt_create(gt, lmem_pt_obj_flags);
}
-struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt)
+struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt,
+ unsigned long lmem_pt_obj_flags)
{
struct i915_ppgtt *ppgtt;
- ppgtt = __ppgtt_create(gt);
+ ppgtt = __ppgtt_create(gt, lmem_pt_obj_flags);
if (IS_ERR(ppgtt))
return ppgtt;
return 0;
}
-void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
+void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt,
+ unsigned long lmem_pt_obj_flags)
{
struct drm_i915_private *i915 = gt->i915;
ppgtt->vm.i915 = i915;
ppgtt->vm.dma = i915->drm.dev;
ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size);
+ ppgtt->vm.lmem_pt_obj_flags = lmem_pt_obj_flags;
dma_resv_init(&ppgtt->vm._resv);
i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL)
return 0;
- ppgtt = i915_ppgtt_create(gt);
+ ppgtt = i915_ppgtt_create(gt, 0);
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
if (HAS_LMEM(gt->i915))
obj = i915_gem_object_create_lmem(gt->i915, size,
I915_BO_ALLOC_CPU_CLEAR |
- I915_BO_ALLOC_CONTIGUOUS);
+ I915_BO_ALLOC_CONTIGUOUS |
+ I915_BO_ALLOC_PM_EARLY);
else
obj = i915_gem_object_create_shmem(gt->i915, size);
if (uc_fw->type == INTEL_UC_FW_TYPE_GUC)
uc_fw->private_data_size = css->private_data_size;
- if (HAS_LMEM(i915))
+ if (HAS_LMEM(i915)) {
obj = i915_gem_object_create_lmem_from_data(i915, fw->data, fw->size);
- else
+ if (!IS_ERR(obj))
+ obj->flags |= I915_BO_ALLOC_PM_EARLY;
+ } else {
obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size);
+ }
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
enum intel_engine_id i;
int ret;
- ppgtt = i915_ppgtt_create(&i915->gt);
+ ppgtt = i915_ppgtt_create(&i915->gt, I915_BO_ALLOC_PM_EARLY);
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
if (!HAS_PPGTT(dev_priv))
return 0;
- ppgtt = i915_ppgtt_create(&dev_priv->gt);
+ ppgtt = i915_ppgtt_create(&dev_priv->gt, 0);
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
if (IS_ERR(file))
return PTR_ERR(file);
- ppgtt = i915_ppgtt_create(&dev_priv->gt);
+ ppgtt = i915_ppgtt_create(&dev_priv->gt, 0);
if (IS_ERR(ppgtt)) {
err = PTR_ERR(ppgtt);
goto out_free;