drm/amdkfd: switch over to using drm_exec v3
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_amdkfd_gpuvm.c
index 83a83ce..dedfee1 100644 (file)
@@ -27,6 +27,8 @@
 #include <linux/sched/task.h>
 #include <drm/ttm/ttm_tt.h>
 
+#include <drm/drm_exec.h>
+
 #include "amdgpu_object.h"
 #include "amdgpu_gem.h"
 #include "amdgpu_vm.h"
@@ -35,7 +37,9 @@
 #include "amdgpu_dma_buf.h"
 #include <uapi/linux/kfd_ioctl.h>
 #include "amdgpu_xgmi.h"
+#include "kfd_priv.h"
 #include "kfd_smi_events.h"
+#include <drm/ttm/ttm_tt.h>
 
 /* Userptr restore delay, just long enough to allow consecutive VM
  * changes to accumulate
@@ -110,13 +114,16 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
        struct sysinfo si;
        uint64_t mem;
 
+       if (kfd_mem_limit.max_system_mem_limit)
+               return;
+
        si_meminfo(&si);
        mem = si.freeram - si.freehigh;
        mem *= si.mem_unit;
 
        spin_lock_init(&kfd_mem_limit.mem_limit_lock);
        kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
-       kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
+       kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT;
        pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
                (kfd_mem_limit.max_system_mem_limit >> 20),
                (kfd_mem_limit.max_ttm_mem_limit >> 20));
@@ -148,16 +155,20 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
  * @size: Size of buffer, in bytes, encapsulated by B0. This should be
  * equivalent to amdgpu_bo_size(BO)
  * @alloc_flag: Flag used in allocating a BO as noted above
+ * @xcp_id: xcp_id is used to get xcp from xcp manager, one xcp is
+ * managed as one compute node in driver for app
  *
- * Return: returns -ENOMEM in case of error, ZERO otherwise
+ * Return:
+ *     returns -ENOMEM in case of error, ZERO otherwise
  */
 int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
-               uint64_t size, u32 alloc_flag)
+               uint64_t size, u32 alloc_flag, int8_t xcp_id)
 {
        uint64_t reserved_for_pt =
                ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
        size_t system_mem_needed, ttm_mem_needed, vram_needed;
        int ret = 0;
+       uint64_t vram_size = 0;
 
        system_mem_needed = 0;
        ttm_mem_needed = 0;
@@ -172,6 +183,17 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
                 * 2M BO chunk.
                 */
                vram_needed = size;
+               /*
+                * For GFX 9.4.3, get the VRAM size from XCP structs
+                */
+               if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
+                       return -EINVAL;
+
+               vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
+               if (adev->gmc.is_app_apu) {
+                       system_mem_needed = size;
+                       ttm_mem_needed = size;
+               }
        } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
                system_mem_needed = size;
        } else if (!(alloc_flag &
@@ -191,8 +213,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
             kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
            (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
             kfd_mem_limit.max_ttm_mem_limit) ||
-           (adev && adev->kfd.vram_used + vram_needed >
-            adev->gmc.real_vram_size - reserved_for_pt)) {
+           (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
+            vram_size - reserved_for_pt)) {
                ret = -ENOMEM;
                goto release;
        }
@@ -202,9 +224,11 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
         */
        WARN_ONCE(vram_needed && !adev,
                  "adev reference can't be null when vram is used");
-       if (adev) {
-               adev->kfd.vram_used += vram_needed;
-               adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
+       if (adev && xcp_id >= 0) {
+               adev->kfd.vram_used[xcp_id] += vram_needed;
+               adev->kfd.vram_used_aligned[xcp_id] += adev->gmc.is_app_apu ?
+                               vram_needed :
+                               ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
        }
        kfd_mem_limit.system_mem_used += system_mem_needed;
        kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
@@ -215,7 +239,7 @@ release:
 }
 
 void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
-               uint64_t size, u32 alloc_flag)
+               uint64_t size, u32 alloc_flag, int8_t xcp_id)
 {
        spin_lock(&kfd_mem_limit.mem_limit_lock);
 
@@ -225,9 +249,19 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
        } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
                WARN_ONCE(!adev,
                          "adev reference can't be null when alloc mem flags vram is set");
+               if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
+                       goto release;
+
                if (adev) {
-                       adev->kfd.vram_used -= size;
-                       adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN);
+                       adev->kfd.vram_used[xcp_id] -= size;
+                       if (adev->gmc.is_app_apu) {
+                               adev->kfd.vram_used_aligned[xcp_id] -= size;
+                               kfd_mem_limit.system_mem_used -= size;
+                               kfd_mem_limit.ttm_mem_used -= size;
+                       } else {
+                               adev->kfd.vram_used_aligned[xcp_id] -=
+                                       ALIGN(size, VRAM_AVAILABLITY_ALIGN);
+                       }
                }
        } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
                kfd_mem_limit.system_mem_used -= size;
@@ -237,8 +271,8 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
                pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
                goto release;
        }
-       WARN_ONCE(adev && adev->kfd.vram_used < 0,
-                 "KFD VRAM memory accounting unbalanced");
+       WARN_ONCE(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] < 0,
+                 "KFD VRAM memory accounting unbalanced for xcp: %d", xcp_id);
        WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
                  "KFD TTM memory accounting unbalanced");
        WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
@@ -254,14 +288,16 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
        u32 alloc_flags = bo->kfd_bo->alloc_flags;
        u64 size = amdgpu_bo_size(bo);
 
-       amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags);
+       amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags,
+                                         bo->xcp_id);
 
        kfree(bo->kfd_bo);
 }
 
 /**
- * @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information
+ * create_dmamap_sg_bo() - Creates a amdgpu_bo object to reflect information
  * about USERPTR or DOOREBELL or MMIO BO.
+ *
  * @adev: Device for which dmamap BO is being created
  * @mem: BO of peer device that is being DMA mapped. Provides parameters
  *      in building the dmamap BO
@@ -285,7 +321,7 @@ create_dmamap_sg_bo(struct amdgpu_device *adev,
 
        ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1,
                        AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags,
-                       ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj);
+                       ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj, 0);
 
        amdgpu_bo_unreserve(mem->bo);
 
@@ -527,6 +563,12 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
 {
        struct ttm_operation_ctx ctx = {.interruptible = true};
        struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+       int ret;
+
+       amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+       ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+       if (ret)
+               return ret;
 
        amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
        return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@@ -659,11 +701,10 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
 static void
 kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
 {
-       struct ttm_operation_ctx ctx = {.interruptible = true};
-       struct amdgpu_bo *bo = attachment->bo_va->base.bo;
-
-       amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
-       ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+       /* This is a no-op. We don't want to trigger eviction fences when
+        * unmapping DMABufs. Therefore the invalidation (moving to system
+        * domain) is done in kfd_mem_dmamap_dmabuf.
+        */
 }
 
 /**
@@ -804,7 +845,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
         * if peer device has large BAR. In contrast, access over xGMI is
         * allowed for both small and large BAR configurations of peer device
         */
-       if ((adev != bo_adev) &&
+       if ((adev != bo_adev && !adev->gmc.is_app_apu) &&
            ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
             (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
             (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
@@ -925,28 +966,20 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
                                struct amdkfd_process_info *process_info,
                                bool userptr)
 {
-       struct ttm_validate_buffer *entry = &mem->validate_list;
-       struct amdgpu_bo *bo = mem->bo;
-
-       INIT_LIST_HEAD(&entry->head);
-       entry->num_shared = 1;
-       entry->bo = &bo->tbo;
        mutex_lock(&process_info->lock);
        if (userptr)
-               list_add_tail(&entry->head, &process_info->userptr_valid_list);
+               list_add_tail(&mem->validate_list,
+                             &process_info->userptr_valid_list);
        else
-               list_add_tail(&entry->head, &process_info->kfd_bo_list);
+               list_add_tail(&mem->validate_list, &process_info->kfd_bo_list);
        mutex_unlock(&process_info->lock);
 }
 
 static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
                struct amdkfd_process_info *process_info)
 {
-       struct ttm_validate_buffer *bo_list_entry;
-
-       bo_list_entry = &mem->validate_list;
        mutex_lock(&process_info->lock);
-       list_del(&bo_list_entry->head);
+       list_del(&mem->validate_list);
        mutex_unlock(&process_info->lock);
 }
 
@@ -1033,13 +1066,12 @@ out:
  * object can track VM updates.
  */
 struct bo_vm_reservation_context {
-       struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */
-       unsigned int n_vms;                 /* Number of VMs reserved       */
-       struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries  */
-       struct ww_acquire_ctx ticket;       /* Reservation ticket           */
-       struct list_head list, duplicates;  /* BO lists                     */
-       struct amdgpu_sync *sync;           /* Pointer to sync object       */
-       bool reserved;                      /* Whether BOs are reserved     */
+       /* DRM execution context for the reservation */
+       struct drm_exec exec;
+       /* Number of VMs reserved */
+       unsigned int n_vms;
+       /* Pointer to sync object */
+       struct amdgpu_sync *sync;
 };
 
 enum bo_vm_match {
@@ -1063,35 +1095,26 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 
        WARN_ON(!vm);
 
-       ctx->reserved = false;
        ctx->n_vms = 1;
        ctx->sync = &mem->sync;
-
-       INIT_LIST_HEAD(&ctx->list);
-       INIT_LIST_HEAD(&ctx->duplicates);
-
-       ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL);
-       if (!ctx->vm_pd)
-               return -ENOMEM;
-
-       ctx->kfd_bo.priority = 0;
-       ctx->kfd_bo.tv.bo = &bo->tbo;
-       ctx->kfd_bo.tv.num_shared = 1;
-       list_add(&ctx->kfd_bo.tv.head, &ctx->list);
-
-       amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
-
-       ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
-                                    false, &ctx->duplicates);
-       if (ret) {
-               pr_err("Failed to reserve buffers in ttm.\n");
-               kfree(ctx->vm_pd);
-               ctx->vm_pd = NULL;
-               return ret;
+       drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+       drm_exec_until_all_locked(&ctx->exec) {
+               ret = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
+               drm_exec_retry_on_contention(&ctx->exec);
+               if (unlikely(ret))
+                       goto error;
+
+               ret = drm_exec_lock_obj(&ctx->exec, &bo->tbo.base);
+               drm_exec_retry_on_contention(&ctx->exec);
+               if (unlikely(ret))
+                       goto error;
        }
-
-       ctx->reserved = true;
        return 0;
+
+error:
+       pr_err("Failed to reserve buffers in ttm.\n");
+       drm_exec_fini(&ctx->exec);
+       return ret;
 }
 
 /**
@@ -1108,63 +1131,39 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
                                struct amdgpu_vm *vm, enum bo_vm_match map_type,
                                struct bo_vm_reservation_context *ctx)
 {
-       struct amdgpu_bo *bo = mem->bo;
        struct kfd_mem_attachment *entry;
-       unsigned int i;
+       struct amdgpu_bo *bo = mem->bo;
        int ret;
 
-       ctx->reserved = false;
-       ctx->n_vms = 0;
-       ctx->vm_pd = NULL;
        ctx->sync = &mem->sync;
+       drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+       drm_exec_until_all_locked(&ctx->exec) {
+               ctx->n_vms = 0;
+               list_for_each_entry(entry, &mem->attachments, list) {
+                       if ((vm && vm != entry->bo_va->base.vm) ||
+                               (entry->is_mapped != map_type
+                               && map_type != BO_VM_ALL))
+                               continue;
 
-       INIT_LIST_HEAD(&ctx->list);
-       INIT_LIST_HEAD(&ctx->duplicates);
-
-       list_for_each_entry(entry, &mem->attachments, list) {
-               if ((vm && vm != entry->bo_va->base.vm) ||
-                       (entry->is_mapped != map_type
-                       && map_type != BO_VM_ALL))
-                       continue;
-
-               ctx->n_vms++;
-       }
-
-       if (ctx->n_vms != 0) {
-               ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd),
-                                    GFP_KERNEL);
-               if (!ctx->vm_pd)
-                       return -ENOMEM;
-       }
-
-       ctx->kfd_bo.priority = 0;
-       ctx->kfd_bo.tv.bo = &bo->tbo;
-       ctx->kfd_bo.tv.num_shared = 1;
-       list_add(&ctx->kfd_bo.tv.head, &ctx->list);
-
-       i = 0;
-       list_for_each_entry(entry, &mem->attachments, list) {
-               if ((vm && vm != entry->bo_va->base.vm) ||
-                       (entry->is_mapped != map_type
-                       && map_type != BO_VM_ALL))
-                       continue;
-
-               amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list,
-                               &ctx->vm_pd[i]);
-               i++;
-       }
+                       ret = amdgpu_vm_lock_pd(entry->bo_va->base.vm,
+                                               &ctx->exec, 2);
+                       drm_exec_retry_on_contention(&ctx->exec);
+                       if (unlikely(ret))
+                               goto error;
+                       ++ctx->n_vms;
+               }
 
-       ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
-                                    false, &ctx->duplicates);
-       if (ret) {
-               pr_err("Failed to reserve buffers in ttm.\n");
-               kfree(ctx->vm_pd);
-               ctx->vm_pd = NULL;
-               return ret;
+               ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1);
+               drm_exec_retry_on_contention(&ctx->exec);
+               if (unlikely(ret))
+                       goto error;
        }
-
-       ctx->reserved = true;
        return 0;
+
+error:
+       pr_err("Failed to reserve buffers in ttm.\n");
+       drm_exec_fini(&ctx->exec);
+       return ret;
 }
 
 /**
@@ -1185,15 +1184,8 @@ static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
        if (wait)
                ret = amdgpu_sync_wait(ctx->sync, intr);
 
-       if (ctx->reserved)
-               ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list);
-       kfree(ctx->vm_pd);
-
+       drm_exec_fini(&ctx->exec);
        ctx->sync = NULL;
-
-       ctx->reserved = false;
-       ctx->vm_pd = NULL;
-
        return ret;
 }
 
@@ -1599,23 +1591,42 @@ out_unlock:
        return ret;
 }
 
-size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
+                                         uint8_t xcp_id)
 {
        uint64_t reserved_for_pt =
                ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
        ssize_t available;
+       uint64_t vram_available, system_mem_available, ttm_mem_available;
 
        spin_lock(&kfd_mem_limit.mem_limit_lock);
-       available = adev->gmc.real_vram_size
-               - adev->kfd.vram_used_aligned
+       vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
+               - adev->kfd.vram_used_aligned[xcp_id]
                - atomic64_read(&adev->vram_pin_size)
                - reserved_for_pt;
+
+       if (adev->gmc.is_app_apu) {
+               system_mem_available = no_system_mem_limit ?
+                                       kfd_mem_limit.max_system_mem_limit :
+                                       kfd_mem_limit.max_system_mem_limit -
+                                       kfd_mem_limit.system_mem_used;
+
+               ttm_mem_available = kfd_mem_limit.max_ttm_mem_limit -
+                               kfd_mem_limit.ttm_mem_used;
+
+               available = min3(system_mem_available, ttm_mem_available,
+                                vram_available);
+               available = ALIGN_DOWN(available, PAGE_SIZE);
+       } else {
+               available = ALIGN_DOWN(vram_available, VRAM_AVAILABLITY_ALIGN);
+       }
+
        spin_unlock(&kfd_mem_limit.mem_limit_lock);
 
        if (available < 0)
                available = 0;
 
-       return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN);
+       return available;
 }
 
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
@@ -1624,6 +1635,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                uint64_t *offset, uint32_t flags, bool criu_resume)
 {
        struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
+       struct amdgpu_fpriv *fpriv = container_of(avm, struct amdgpu_fpriv, vm);
        enum ttm_bo_type bo_type = ttm_bo_type_device;
        struct sg_table *sg = NULL;
        uint64_t user_addr = 0;
@@ -1631,6 +1643,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
        struct drm_gem_object *gobj = NULL;
        u32 domain, alloc_domain;
        uint64_t aligned_size;
+       int8_t xcp_id = -1;
        u64 alloc_flags;
        int ret;
 
@@ -1639,9 +1652,17 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
         */
        if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
                domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
-               alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
-               alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
+
+               if (adev->gmc.is_app_apu) {
+                       domain = AMDGPU_GEM_DOMAIN_GTT;
+                       alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+                       alloc_flags = 0;
+               } else {
+                       alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+                       alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
                        AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
+               }
+               xcp_id = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id;
        } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
                domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
                alloc_flags = 0;
@@ -1693,17 +1714,19 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
        amdgpu_sync_create(&(*mem)->sync);
 
-       ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
+       ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags,
+                                             xcp_id);
        if (ret) {
                pr_debug("Insufficient memory\n");
                goto err_reserve_limit;
        }
 
-       pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
-                       va, (*mem)->aql_queue ? size << 1 : size, domain_string(alloc_domain));
+       pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s xcp_id %d\n",
+                va, (*mem)->aql_queue ? size << 1 : size,
+                domain_string(alloc_domain), xcp_id);
 
        ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
-                                      bo_type, NULL, &gobj);
+                                      bo_type, NULL, &gobj, xcp_id + 1);
        if (ret) {
                pr_debug("Failed to create BO on domain %s. ret %d\n",
                         domain_string(alloc_domain), ret);
@@ -1728,6 +1751,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
        (*mem)->domain = domain;
        (*mem)->mapped_to_gpu_memory = 0;
        (*mem)->process_info = avm->process_info;
+
        add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
 
        if (user_addr) {
@@ -1759,7 +1783,7 @@ err_node_allow:
        /* Don't unreserve system mem limit twice */
        goto err_reserve_limit;
 err_bo_create:
-       amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
+       amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
 err_reserve_limit:
        mutex_destroy(&(*mem)->lock);
        if (gobj)
@@ -1783,7 +1807,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
        bool use_release_notifier = (mem->bo->kfd_bo == mem);
        struct kfd_mem_attachment *entry, *tmp;
        struct bo_vm_reservation_context ctx;
-       struct ttm_validate_buffer *bo_list_entry;
        unsigned int mapped_to_gpu_memory;
        int ret;
        bool is_imported = false;
@@ -1811,9 +1834,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
        }
 
        /* Make sure restore workers don't access the BO any more */
-       bo_list_entry = &mem->validate_list;
        mutex_lock(&process_info->lock);
-       list_del(&bo_list_entry->head);
+       list_del(&mem->validate_list);
        mutex_unlock(&process_info->lock);
 
        /* Cleanup user pages and MMU notifiers */
@@ -1855,11 +1877,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
        }
 
        /* Update the size of the BO being freed if it was allocated from
-        * VRAM and is not imported.
+        * VRAM and is not imported. For APP APU VRAM allocations are done
+        * in GTT domain
         */
        if (size) {
-               if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) &&
-                   (!is_imported))
+               if (!is_imported &&
+                  (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
+                  (adev->gmc.is_app_apu &&
+                   mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
                        *size = bo_size;
                else
                        *size = 0;
@@ -2282,8 +2307,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
        (*mem)->dmabuf = dma_buf;
        (*mem)->bo = bo;
        (*mem)->va = va;
-       (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+       (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ?
                AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+
        (*mem)->mapped_to_gpu_memory = 0;
        (*mem)->process_info = avm->process_info;
        add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
@@ -2376,14 +2402,14 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
        /* Move all invalidated BOs to the userptr_inval_list */
        list_for_each_entry_safe(mem, tmp_mem,
                                 &process_info->userptr_valid_list,
-                                validate_list.head)
+                                validate_list)
                if (mem->invalid)
-                       list_move_tail(&mem->validate_list.head,
+                       list_move_tail(&mem->validate_list,
                                       &process_info->userptr_inval_list);
 
        /* Go through userptr_inval_list and update any invalid user_pages */
        list_for_each_entry(mem, &process_info->userptr_inval_list,
-                           validate_list.head) {
+                           validate_list) {
                invalid = mem->invalid;
                if (!invalid)
                        /* BO hasn't been invalidated since the last
@@ -2445,7 +2471,9 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
                        ret = -EAGAIN;
                        goto unlock_out;
                }
-               mem->invalid = 0;
+                /* set mem valid if mem has hmm range associated */
+               if (mem->range)
+                       mem->invalid = 0;
        }
 
 unlock_out:
@@ -2461,50 +2489,41 @@ unlock_out:
  */
 static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 {
-       struct amdgpu_bo_list_entry *pd_bo_list_entries;
-       struct list_head resv_list, duplicates;
-       struct ww_acquire_ctx ticket;
+       struct ttm_operation_ctx ctx = { false, false };
        struct amdgpu_sync sync;
+       struct drm_exec exec;
 
        struct amdgpu_vm *peer_vm;
        struct kgd_mem *mem, *tmp_mem;
        struct amdgpu_bo *bo;
-       struct ttm_operation_ctx ctx = { false, false };
-       int i, ret;
-
-       pd_bo_list_entries = kcalloc(process_info->n_vms,
-                                    sizeof(struct amdgpu_bo_list_entry),
-                                    GFP_KERNEL);
-       if (!pd_bo_list_entries) {
-               pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
-               ret = -ENOMEM;
-               goto out_no_mem;
-       }
-
-       INIT_LIST_HEAD(&resv_list);
-       INIT_LIST_HEAD(&duplicates);
+       int ret;
 
-       /* Get all the page directory BOs that need to be reserved */
-       i = 0;
-       list_for_each_entry(peer_vm, &process_info->vm_list_head,
-                           vm_list_node)
-               amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
-                                   &pd_bo_list_entries[i++]);
-       /* Add the userptr_inval_list entries to resv_list */
-       list_for_each_entry(mem, &process_info->userptr_inval_list,
-                           validate_list.head) {
-               list_add_tail(&mem->resv_list.head, &resv_list);
-               mem->resv_list.bo = mem->validate_list.bo;
-               mem->resv_list.num_shared = mem->validate_list.num_shared;
-       }
+       amdgpu_sync_create(&sync);
 
+       drm_exec_init(&exec, 0);
        /* Reserve all BOs and page tables for validation */
-       ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
-       WARN(!list_empty(&duplicates), "Duplicates should be empty");
-       if (ret)
-               goto out_free;
+       drm_exec_until_all_locked(&exec) {
+               /* Reserve all the page directories */
+               list_for_each_entry(peer_vm, &process_info->vm_list_head,
+                                   vm_list_node) {
+                       ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
+                       drm_exec_retry_on_contention(&exec);
+                       if (unlikely(ret))
+                               goto unreserve_out;
+               }
 
-       amdgpu_sync_create(&sync);
+               /* Reserve the userptr_inval_list entries to resv_list */
+               list_for_each_entry(mem, &process_info->userptr_inval_list,
+                                   validate_list) {
+                       struct drm_gem_object *gobj;
+
+                       gobj = &mem->bo->tbo.base;
+                       ret = drm_exec_prepare_obj(&exec, gobj, 1);
+                       drm_exec_retry_on_contention(&exec);
+                       if (unlikely(ret))
+                               goto unreserve_out;
+               }
+       }
 
        ret = process_validate_vms(process_info);
        if (ret)
@@ -2513,7 +2532,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
        /* Validate BOs and update GPUVM page tables */
        list_for_each_entry_safe(mem, tmp_mem,
                                 &process_info->userptr_inval_list,
-                                validate_list.head) {
+                                validate_list) {
                struct kfd_mem_attachment *attachment;
 
                bo = mem->bo;
@@ -2555,12 +2574,9 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
        ret = process_update_pds(process_info, &sync);
 
 unreserve_out:
-       ttm_eu_backoff_reservation(&ticket, &resv_list);
+       drm_exec_fini(&exec);
        amdgpu_sync_wait(&sync, false);
        amdgpu_sync_free(&sync);
-out_free:
-       kfree(pd_bo_list_entries);
-out_no_mem:
 
        return ret;
 }
@@ -2576,9 +2592,16 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i
 
        list_for_each_entry_safe(mem, tmp_mem,
                                 &process_info->userptr_inval_list,
-                                validate_list.head) {
-               bool valid = amdgpu_ttm_tt_get_user_pages_done(
-                               mem->bo->tbo.ttm, mem->range);
+                                validate_list) {
+               bool valid;
+
+               /* keep mem without hmm range at userptr_inval_list */
+               if (!mem->range)
+                        continue;
+
+               /* Only check mem with hmm range associated */
+               valid = amdgpu_ttm_tt_get_user_pages_done(
+                                       mem->bo->tbo.ttm, mem->range);
 
                mem->range = NULL;
                if (!valid) {
@@ -2586,9 +2609,14 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i
                        ret = -EAGAIN;
                        continue;
                }
-               WARN(mem->invalid, "Valid BO is marked invalid");
 
-               list_move_tail(&mem->validate_list.head,
+               if (mem->invalid) {
+                       WARN(1, "Valid BO is marked invalid");
+                       ret = -EAGAIN;
+                       continue;
+               }
+
+               list_move_tail(&mem->validate_list,
                               &process_info->userptr_valid_list);
        }
 
@@ -2698,50 +2726,44 @@ unlock_out:
  */
 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 {
-       struct amdgpu_bo_list_entry *pd_bo_list;
        struct amdkfd_process_info *process_info = info;
        struct amdgpu_vm *peer_vm;
        struct kgd_mem *mem;
-       struct bo_vm_reservation_context ctx;
        struct amdgpu_amdkfd_fence *new_fence;
-       int ret = 0, i;
        struct list_head duplicate_save;
        struct amdgpu_sync sync_obj;
        unsigned long failed_size = 0;
        unsigned long total_size = 0;
+       struct drm_exec exec;
+       int ret;
 
        INIT_LIST_HEAD(&duplicate_save);
-       INIT_LIST_HEAD(&ctx.list);
-       INIT_LIST_HEAD(&ctx.duplicates);
-
-       pd_bo_list = kcalloc(process_info->n_vms,
-                            sizeof(struct amdgpu_bo_list_entry),
-                            GFP_KERNEL);
-       if (!pd_bo_list)
-               return -ENOMEM;
 
-       i = 0;
        mutex_lock(&process_info->lock);
-       list_for_each_entry(peer_vm, &process_info->vm_list_head,
-                       vm_list_node)
-               amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]);
-
-       /* Reserve all BOs and page tables/directory. Add all BOs from
-        * kfd_bo_list to ctx.list
-        */
-       list_for_each_entry(mem, &process_info->kfd_bo_list,
-                           validate_list.head) {
 
-               list_add_tail(&mem->resv_list.head, &ctx.list);
-               mem->resv_list.bo = mem->validate_list.bo;
-               mem->resv_list.num_shared = mem->validate_list.num_shared;
-       }
+       drm_exec_init(&exec, 0);
+       drm_exec_until_all_locked(&exec) {
+               list_for_each_entry(peer_vm, &process_info->vm_list_head,
+                                   vm_list_node) {
+                       ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
+                       drm_exec_retry_on_contention(&exec);
+                       if (unlikely(ret))
+                               goto ttm_reserve_fail;
+               }
 
-       ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
-                                    false, &duplicate_save);
-       if (ret) {
-               pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
-               goto ttm_reserve_fail;
+               /* Reserve all BOs and page tables/directory. Add all BOs from
+                * kfd_bo_list to ctx.list
+                */
+               list_for_each_entry(mem, &process_info->kfd_bo_list,
+                                   validate_list) {
+                       struct drm_gem_object *gobj;
+
+                       gobj = &mem->bo->tbo.base;
+                       ret = drm_exec_prepare_obj(&exec, gobj, 1);
+                       drm_exec_retry_on_contention(&exec);
+                       if (unlikely(ret))
+                               goto ttm_reserve_fail;
+               }
        }
 
        amdgpu_sync_create(&sync_obj);
@@ -2759,7 +2781,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 
        /* Validate BOs and map them to GPUVM (update VM page tables). */
        list_for_each_entry(mem, &process_info->kfd_bo_list,
-                           validate_list.head) {
+                           validate_list) {
 
                struct amdgpu_bo *bo = mem->bo;
                uint32_t domain = mem->domain;
@@ -2832,8 +2854,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
        *ef = dma_fence_get(&new_fence->base);
 
        /* Attach new eviction fence to all BOs except pinned ones */
-       list_for_each_entry(mem, &process_info->kfd_bo_list,
-               validate_list.head) {
+       list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) {
                if (mem->bo->tbo.pin_count)
                        continue;
 
@@ -2852,11 +2873,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
        }
 
 validate_map_fail:
-       ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list);
        amdgpu_sync_free(&sync_obj);
 ttm_reserve_fail:
+       drm_exec_fini(&exec);
        mutex_unlock(&process_info->lock);
-       kfree(pd_bo_list);
        return ret;
 }