struct xe_vm *vm;
/** @tile: The tile we're building for. */
struct xe_tile *tile;
- /** @cache: Desired cache level for the ptes */
- enum xe_cache_level cache;
/** @default_pte: PTE flag only template. No address is associated */
u64 default_pte;
/** @dma_offset: DMA offset to add to the PTE. */
{
struct xe_pt_stage_bind_walk *xe_walk =
container_of(walk, typeof(*xe_walk), base);
- u16 pat_index = tile_to_xe(xe_walk->tile)->pat.idx[xe_walk->cache];
+ u16 pat_index = xe_walk->vma->pat_index;
struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base);
struct xe_vm *vm = xe_walk->vm;
struct xe_pt *xe_child;
if (is_devmem) {
xe_walk.default_pte |= XE_PPGTT_PTE_DM;
xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource);
- xe_walk.cache = XE_CACHE_WB;
- } else {
- if (!xe_vma_has_no_bo(vma) && bo->flags & XE_BO_SCANOUT_BIT)
- xe_walk.cache = XE_CACHE_WT;
- else
- xe_walk.cache = XE_CACHE_WB;
}
+
if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo))
xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo));
#include "xe_vm.h"
#include <linux/dma-fence-array.h>
+#include <linux/nospec.h>
#include <drm/drm_exec.h>
#include <drm/drm_print.h>
#include "xe_gt_pagefault.h"
#include "xe_gt_tlb_invalidation.h"
#include "xe_migrate.h"
+#include "xe_pat.h"
#include "xe_pm.h"
#include "xe_preempt_fence.h"
#include "xe_pt.h"
u64 start, u64 end,
bool read_only,
bool is_null,
- u8 tile_mask)
+ u8 tile_mask,
+ u16 pat_index)
{
struct xe_vma *vma;
struct xe_tile *tile;
if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC)
vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
+ vma->pat_index = pat_index;
+
if (bo) {
struct drm_gpuvm_bo *vm_bo;
vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
u64 bo_offset_or_userptr, u64 addr, u64 range,
u32 operation, u32 flags, u8 tile_mask,
- u32 prefetch_region)
+ u32 prefetch_region, u16 pat_index)
{
struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
struct drm_gpuva_ops *ops;
op->map.read_only =
flags & DRM_XE_VM_BIND_FLAG_READONLY;
op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
+ op->map.pat_index = pat_index;
} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
op->prefetch.region = prefetch_region;
}
}
static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
- u8 tile_mask, bool read_only, bool is_null)
+ u8 tile_mask, bool read_only, bool is_null,
+ u16 pat_index)
{
struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
struct xe_vma *vma;
vma = xe_vma_create(vm, bo, op->gem.offset,
op->va.addr, op->va.addr +
op->va.range - 1, read_only, is_null,
- tile_mask);
+ tile_mask, pat_index);
if (bo)
xe_bo_unlock(bo);
vma = new_vma(vm, &op->base.map,
op->tile_mask, op->map.read_only,
- op->map.is_null);
+ op->map.is_null, op->map.pat_index);
if (IS_ERR(vma))
return PTR_ERR(vma);
vma = new_vma(vm, op->base.remap.prev,
op->tile_mask, read_only,
- is_null);
+ is_null, old->pat_index);
if (IS_ERR(vma))
return PTR_ERR(vma);
vma = new_vma(vm, op->base.remap.next,
op->tile_mask, read_only,
- is_null);
+ is_null, old->pat_index);
if (IS_ERR(vma))
return PTR_ERR(vma);
u64 obj_offset = (*bind_ops)[i].obj_offset;
u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
+ u16 pat_index = (*bind_ops)[i].pat_index;
+ u16 coh_mode;
+
+ if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
+ err = -EINVAL;
+ goto free_bind_ops;
+ }
+
+ pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
+ (*bind_ops)[i].pat_index = pat_index;
+ coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+ if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
+ err = -EINVAL;
+ goto free_bind_ops;
+ }
+
+ if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
+ err = -EINVAL;
+ goto free_bind_ops;
+ }
if (i == 0) {
*async = !!(flags & DRM_XE_VM_BIND_FLAG_ASYNC);
op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
XE_IOCTL_DBG(xe, obj &&
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
+ XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
+ op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
XE_IOCTL_DBG(xe, obj &&
op == DRM_XE_VM_BIND_OP_PREFETCH) ||
XE_IOCTL_DBG(xe, prefetch_region &&
u64 addr = bind_ops[i].addr;
u32 obj = bind_ops[i].obj;
u64 obj_offset = bind_ops[i].obj_offset;
+ u16 pat_index = bind_ops[i].pat_index;
+ u16 coh_mode;
if (!obj)
continue;
goto put_obj;
}
}
+
+ coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+ if (bos[i]->cpu_caching) {
+ if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
+ bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
+ err = -EINVAL;
+ goto put_obj;
+ }
+ } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
+ /*
+ * Imported dma-buf from a different device should
+ * require 1way or 2way coherency since we don't know
+ * how it was mapped on the CPU. Just assume is it
+ * potentially cached on CPU side.
+ */
+ err = -EINVAL;
+ goto put_obj;
+ }
}
if (args->num_syncs) {
u64 obj_offset = bind_ops[i].obj_offset;
u8 tile_mask = bind_ops[i].tile_mask;
u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
+ u16 pat_index = bind_ops[i].pat_index;
ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
addr, range, op, flags,
- tile_mask, prefetch_region);
+ tile_mask, prefetch_region,
+ pat_index);
if (IS_ERR(ops[i])) {
err = PTR_ERR(ops[i]);
ops[i] = NULL;
*/
__u32 obj;
+ /**
+ * @pat_index: The platform defined @pat_index to use for this mapping.
+ * The index basically maps to some predefined memory attributes,
+ * including things like caching, coherency, compression etc. The exact
+ * meaning of the pat_index is platform specific and defined in the
+ * Bspec and PRMs. When the KMD sets up the binding the index here is
+ * encoded into the ppGTT PTE.
+ *
+ * For coherency the @pat_index needs to be at least 1way coherent when
+ * drm_xe_gem_create.cpu_caching is DRM_XE_GEM_CPU_CACHING_WB. The KMD
+ * will extract the coherency mode from the @pat_index and reject if
+ * there is a mismatch (see note below for pre-MTL platforms).
+ *
+ * Note: On pre-MTL platforms there is only a caching mode and no
+ * explicit coherency mode, but on such hardware there is always a
+ * shared-LLC (or is dgpu) so all GT memory accesses are coherent with
+ * CPU caches even with the caching mode set as uncached. It's only the
+ * display engine that is incoherent (on dgpu it must be in VRAM which
+ * is always mapped as WC on the CPU). However to keep the uapi somewhat
+ * consistent with newer platforms the KMD groups the different cache
+ * levels into the following coherency buckets on all pre-MTL platforms:
+ *
+ * ppGTT UC -> COH_NONE
+ * ppGTT WC -> COH_NONE
+ * ppGTT WT -> COH_NONE
+ * ppGTT WB -> COH_AT_LEAST_1WAY
+ *
+ * In practice UC/WC/WT should only ever used for scanout surfaces on
+ * such platforms (or perhaps in general for dma-buf if shared with
+ * another device) since it is only the display engine that is actually
+ * incoherent. Everything else should typically use WB given that we
+ * have a shared-LLC. On MTL+ this completely changes and the HW
+ * defines the coherency mode as part of the @pat_index, where
+ * incoherent GT access is possible.
+ *
+ * Note: For userptr and externally imported dma-buf the kernel expects
+ * either 1WAY or 2WAY for the @pat_index.
+ *
+ * For DRM_XE_VM_BIND_FLAG_NULL bindings there are no KMD restrictions
+ * on the @pat_index. For such mappings there is no actual memory being
+ * mapped (the address in the PTE is invalid), so the various PAT memory
+ * attributes likely do not apply. Simply leaving as zero is one
+ * option (still a valid pat_index).
+ */
+ __u16 pat_index;
+
/** @pad: MBZ */
- __u32 pad;
+ __u16 pad;
union {
/**