drm/amdkfd: switch over to using drm_exec v3

[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_amdkfd_gpuvm.c
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index 83a83ce..dedfee1 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -27,6 +27,8 @@
  #include <linux/sched/task.h>
  #include <drm/ttm/ttm_tt.h>
  
+#include <drm/drm_exec.h>
+
  #include "amdgpu_object.h"
  #include "amdgpu_gem.h"
  #include "amdgpu_vm.h"
@@ -35,7 +37,9 @@
  #include "amdgpu_dma_buf.h"
  #include <uapi/linux/kfd_ioctl.h>
  #include "amdgpu_xgmi.h"
+#include "kfd_priv.h"
  #include "kfd_smi_events.h"
+#include <drm/ttm/ttm_tt.h>
  
  /* Userptr restore delay, just long enough to allow consecutive VM
   * changes to accumulate
@@ -110,13 +114,16 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
         struct sysinfo si;
         uint64_t mem;
  
+       if (kfd_mem_limit.max_system_mem_limit)
+               return;
+
         si_meminfo(&si);
         mem = si.freeram - si.freehigh;
         mem *= si.mem_unit;
  
         spin_lock_init(&kfd_mem_limit.mem_limit_lock);
         kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
-       kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
+       kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT;
         pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
                 (kfd_mem_limit.max_system_mem_limit >> 20),
                 (kfd_mem_limit.max_ttm_mem_limit >> 20));
@@ -148,16 +155,20 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
   * @size: Size of buffer, in bytes, encapsulated by B0. This should be
   * equivalent to amdgpu_bo_size(BO)
   * @alloc_flag: Flag used in allocating a BO as noted above
+ * @xcp_id: xcp_id is used to get xcp from xcp manager, one xcp is
+ * managed as one compute node in driver for app
   *
- * Return: returns -ENOMEM in case of error, ZERO otherwise
+ * Return:
+ *     returns -ENOMEM in case of error, ZERO otherwise
   */
  int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
-               uint64_t size, u32 alloc_flag)
+               uint64_t size, u32 alloc_flag, int8_t xcp_id)
  {
         uint64_t reserved_for_pt =
                 ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
         size_t system_mem_needed, ttm_mem_needed, vram_needed;
         int ret = 0;
+       uint64_t vram_size = 0;
  
         system_mem_needed = 0;
         ttm_mem_needed = 0;
@@ -172,6 +183,17 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
                  * 2M BO chunk.
                  */
                 vram_needed = size;
+               /*
+                * For GFX 9.4.3, get the VRAM size from XCP structs
+                */
+               if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
+                       return -EINVAL;
+
+               vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
+               if (adev->gmc.is_app_apu) {
+                       system_mem_needed = size;
+                       ttm_mem_needed = size;
+               }
         } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
                 system_mem_needed = size;
         } else if (!(alloc_flag &
@@ -191,8 +213,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
              kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
             (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
              kfd_mem_limit.max_ttm_mem_limit) ||
-           (adev && adev->kfd.vram_used + vram_needed >
-            adev->gmc.real_vram_size - reserved_for_pt)) {
+           (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
+            vram_size - reserved_for_pt)) {
                 ret = -ENOMEM;
                 goto release;
         }
@@ -202,9 +224,11 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
          */
         WARN_ONCE(vram_needed && !adev,
                   "adev reference can't be null when vram is used");
-       if (adev) {
-               adev->kfd.vram_used += vram_needed;
-               adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
+       if (adev && xcp_id >= 0) {
+               adev->kfd.vram_used[xcp_id] += vram_needed;
+               adev->kfd.vram_used_aligned[xcp_id] += adev->gmc.is_app_apu ?
+                               vram_needed :
+                               ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
         }
         kfd_mem_limit.system_mem_used += system_mem_needed;
         kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
@@ -215,7 +239,7 @@ release:
  }
  
  void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
-               uint64_t size, u32 alloc_flag)
+               uint64_t size, u32 alloc_flag, int8_t xcp_id)
  {
         spin_lock(&kfd_mem_limit.mem_limit_lock);
  
@@ -225,9 +249,19 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
         } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
                 WARN_ONCE(!adev,
                           "adev reference can't be null when alloc mem flags vram is set");
+               if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
+                       goto release;
+
                 if (adev) {
-                       adev->kfd.vram_used -= size;
-                       adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN);
+                       adev->kfd.vram_used[xcp_id] -= size;
+                       if (adev->gmc.is_app_apu) {
+                               adev->kfd.vram_used_aligned[xcp_id] -= size;
+                               kfd_mem_limit.system_mem_used -= size;
+                               kfd_mem_limit.ttm_mem_used -= size;
+                       } else {
+                               adev->kfd.vram_used_aligned[xcp_id] -=
+                                       ALIGN(size, VRAM_AVAILABLITY_ALIGN);
+                       }
                 }
         } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
                 kfd_mem_limit.system_mem_used -= size;
@@ -237,8 +271,8 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
                 pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
                 goto release;
         }
-       WARN_ONCE(adev && adev->kfd.vram_used < 0,
-                 "KFD VRAM memory accounting unbalanced");
+       WARN_ONCE(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] < 0,
+                 "KFD VRAM memory accounting unbalanced for xcp: %d", xcp_id);
         WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
                   "KFD TTM memory accounting unbalanced");
         WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
@@ -254,14 +288,16 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
         u32 alloc_flags = bo->kfd_bo->alloc_flags;
         u64 size = amdgpu_bo_size(bo);
  
-       amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags);
+       amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags,
+                                         bo->xcp_id);
  
         kfree(bo->kfd_bo);
  }
  
  /**
- * @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information
+ * create_dmamap_sg_bo() - Creates a amdgpu_bo object to reflect information
   * about USERPTR or DOOREBELL or MMIO BO.
+ *
   * @adev: Device for which dmamap BO is being created
   * @mem: BO of peer device that is being DMA mapped. Provides parameters
   *      in building the dmamap BO
@@ -285,7 +321,7 @@ create_dmamap_sg_bo(struct amdgpu_device *adev,
  
         ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1,
                         AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags,
-                       ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj);
+                       ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj, 0);
  
         amdgpu_bo_unreserve(mem->bo);
  
@@ -527,6 +563,12 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
  {
         struct ttm_operation_ctx ctx = {.interruptible = true};
         struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+       int ret;
+
+       amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+       ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+       if (ret)
+               return ret;
  
         amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
         return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@@ -659,11 +701,10 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
  static void
  kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
  {
-       struct ttm_operation_ctx ctx = {.interruptible = true};
-       struct amdgpu_bo *bo = attachment->bo_va->base.bo;
-
-       amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
-       ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+       /* This is a no-op. We don't want to trigger eviction fences when
+        * unmapping DMABufs. Therefore the invalidation (moving to system
+        * domain) is done in kfd_mem_dmamap_dmabuf.
+        */
  }
  
  /**
@@ -804,7 +845,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
          * if peer device has large BAR. In contrast, access over xGMI is
          * allowed for both small and large BAR configurations of peer device
          */
-       if ((adev != bo_adev) &&
+       if ((adev != bo_adev && !adev->gmc.is_app_apu) &&
             ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
              (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
              (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
@@ -925,28 +966,20 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
                                 struct amdkfd_process_info *process_info,
                                 bool userptr)
  {
-       struct ttm_validate_buffer *entry = &mem->validate_list;
-       struct amdgpu_bo *bo = mem->bo;
-
-       INIT_LIST_HEAD(&entry->head);
-       entry->num_shared = 1;
-       entry->bo = &bo->tbo;
         mutex_lock(&process_info->lock);
         if (userptr)
-               list_add_tail(&entry->head, &process_info->userptr_valid_list);
+               list_add_tail(&mem->validate_list,
+                             &process_info->userptr_valid_list);
         else
-               list_add_tail(&entry->head, &process_info->kfd_bo_list);
+               list_add_tail(&mem->validate_list, &process_info->kfd_bo_list);
         mutex_unlock(&process_info->lock);
  }
  
  static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
                 struct amdkfd_process_info *process_info)
  {
-       struct ttm_validate_buffer *bo_list_entry;
-
-       bo_list_entry = &mem->validate_list;
         mutex_lock(&process_info->lock);
-       list_del(&bo_list_entry->head);
+       list_del(&mem->validate_list);
         mutex_unlock(&process_info->lock);
  }
  
@@ -1033,13 +1066,12 @@ out:
   * object can track VM updates.
   */
  struct bo_vm_reservation_context {
-       struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */
-       unsigned int n_vms;                 /* Number of VMs reserved       */
-       struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries  */
-       struct ww_acquire_ctx ticket;       /* Reservation ticket           */
-       struct list_head list, duplicates;  /* BO lists                     */
-       struct amdgpu_sync *sync;           /* Pointer to sync object       */
-       bool reserved;                      /* Whether BOs are reserved     */
+       /* DRM execution context for the reservation */
+       struct drm_exec exec;
+       /* Number of VMs reserved */
+       unsigned int n_vms;
+       /* Pointer to sync object */
+       struct amdgpu_sync *sync;
  };
  
  enum bo_vm_match {
@@ -1063,35 +1095,26 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
  
         WARN_ON(!vm);
  
-       ctx->reserved = false;
         ctx->n_vms = 1;
         ctx->sync = &mem->sync;
-
-       INIT_LIST_HEAD(&ctx->list);
-       INIT_LIST_HEAD(&ctx->duplicates);
-
-       ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL);
-       if (!ctx->vm_pd)
-               return -ENOMEM;
-
-       ctx->kfd_bo.priority = 0;
-       ctx->kfd_bo.tv.bo = &bo->tbo;
-       ctx->kfd_bo.tv.num_shared = 1;
-       list_add(&ctx->kfd_bo.tv.head, &ctx->list);
-
-       amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
-
-       ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
-                                    false, &ctx->duplicates);
-       if (ret) {
-               pr_err("Failed to reserve buffers in ttm.\n");
-               kfree(ctx->vm_pd);
-               ctx->vm_pd = NULL;
-               return ret;
+       drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+       drm_exec_until_all_locked(&ctx->exec) {
+               ret = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
+               drm_exec_retry_on_contention(&ctx->exec);
+               if (unlikely(ret))
+                       goto error;
+
+               ret = drm_exec_lock_obj(&ctx->exec, &bo->tbo.base);
+               drm_exec_retry_on_contention(&ctx->exec);
+               if (unlikely(ret))
+                       goto error;
         }
-
-       ctx->reserved = true;
         return 0;
+
+error:
+       pr_err("Failed to reserve buffers in ttm.\n");
+       drm_exec_fini(&ctx->exec);
+       return ret;
  }
  
  /**
@@ -1108,63 +1131,39 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
                                 struct amdgpu_vm *vm, enum bo_vm_match map_type,
                                 struct bo_vm_reservation_context *ctx)
  {
-       struct amdgpu_bo *bo = mem->bo;
         struct kfd_mem_attachment *entry;
-       unsigned int i;
+       struct amdgpu_bo *bo = mem->bo;
         int ret;
  
-       ctx->reserved = false;
-       ctx->n_vms = 0;
-       ctx->vm_pd = NULL;
         ctx->sync = &mem->sync;
+       drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+       drm_exec_until_all_locked(&ctx->exec) {
+               ctx->n_vms = 0;
+               list_for_each_entry(entry, &mem->attachments, list) {
+                       if ((vm && vm != entry->bo_va->base.vm) ||
+                               (entry->is_mapped != map_type
+                               && map_type != BO_VM_ALL))
+                               continue;
  
-       INIT_LIST_HEAD(&ctx->list);
-       INIT_LIST_HEAD(&ctx->duplicates);
-
-       list_for_each_entry(entry, &mem->attachments, list) {
-               if ((vm && vm != entry->bo_va->base.vm) ||
-                       (entry->is_mapped != map_type
-                       && map_type != BO_VM_ALL))
-                       continue;
-
-               ctx->n_vms++;
-       }
-
-       if (ctx->n_vms != 0) {
-               ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd),
-                                    GFP_KERNEL);
-               if (!ctx->vm_pd)
-                       return -ENOMEM;
-       }
-
-       ctx->kfd_bo.priority = 0;
-       ctx->kfd_bo.tv.bo = &bo->tbo;
-       ctx->kfd_bo.tv.num_shared = 1;
-       list_add(&ctx->kfd_bo.tv.head, &ctx->list);
-
-       i = 0;
-       list_for_each_entry(entry, &mem->attachments, list) {
-               if ((vm && vm != entry->bo_va->base.vm) ||
-                       (entry->is_mapped != map_type
-                       && map_type != BO_VM_ALL))
-                       continue;
-
-               amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list,
-                               &ctx->vm_pd[i]);
-               i++;
-       }
+                       ret = amdgpu_vm_lock_pd(entry->bo_va->base.vm,
+                                               &ctx->exec, 2);
+                       drm_exec_retry_on_contention(&ctx->exec);
+                       if (unlikely(ret))
+                               goto error;
+                       ++ctx->n_vms;
+               }
  
-       ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
-                                    false, &ctx->duplicates);
-       if (ret) {
-               pr_err("Failed to reserve buffers in ttm.\n");
-               kfree(ctx->vm_pd);
-               ctx->vm_pd = NULL;
-               return ret;
+               ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1);
+               drm_exec_retry_on_contention(&ctx->exec);
+               if (unlikely(ret))
+                       goto error;
         }
-
-       ctx->reserved = true;
         return 0;
+
+error:
+       pr_err("Failed to reserve buffers in ttm.\n");
+       drm_exec_fini(&ctx->exec);
+       return ret;
  }
  
  /**
@@ -1185,15 +1184,8 @@ static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
         if (wait)
                 ret = amdgpu_sync_wait(ctx->sync, intr);
  
-       if (ctx->reserved)
-               ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list);
-       kfree(ctx->vm_pd);
-
+       drm_exec_fini(&ctx->exec);
         ctx->sync = NULL;
-
-       ctx->reserved = false;
-       ctx->vm_pd = NULL;
-
         return ret;
  }
  
@@ -1599,23 +1591,42 @@ out_unlock:
         return ret;
  }
  
-size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
+                                         uint8_t xcp_id)
  {
         uint64_t reserved_for_pt =
                 ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
         ssize_t available;
+       uint64_t vram_available, system_mem_available, ttm_mem_available;
  
         spin_lock(&kfd_mem_limit.mem_limit_lock);
-       available = adev->gmc.real_vram_size
-               - adev->kfd.vram_used_aligned
+       vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
+               - adev->kfd.vram_used_aligned[xcp_id]
                 - atomic64_read(&adev->vram_pin_size)
                 - reserved_for_pt;
+
+       if (adev->gmc.is_app_apu) {
+               system_mem_available = no_system_mem_limit ?
+                                       kfd_mem_limit.max_system_mem_limit :
+                                       kfd_mem_limit.max_system_mem_limit -
+                                       kfd_mem_limit.system_mem_used;
+
+               ttm_mem_available = kfd_mem_limit.max_ttm_mem_limit -
+                               kfd_mem_limit.ttm_mem_used;
+
+               available = min3(system_mem_available, ttm_mem_available,
+                                vram_available);
+               available = ALIGN_DOWN(available, PAGE_SIZE);
+       } else {
+               available = ALIGN_DOWN(vram_available, VRAM_AVAILABLITY_ALIGN);
+       }
+
         spin_unlock(&kfd_mem_limit.mem_limit_lock);
  
         if (available < 0)
                 available = 0;
  
-       return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN);
+       return available;
  }
  
  int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
@@ -1624,6 +1635,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                 uint64_t *offset, uint32_t flags, bool criu_resume)
  {
         struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
+       struct amdgpu_fpriv *fpriv = container_of(avm, struct amdgpu_fpriv, vm);
         enum ttm_bo_type bo_type = ttm_bo_type_device;
         struct sg_table *sg = NULL;
         uint64_t user_addr = 0;
@@ -1631,6 +1643,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
         struct drm_gem_object *gobj = NULL;
         u32 domain, alloc_domain;
         uint64_t aligned_size;
+       int8_t xcp_id = -1;
         u64 alloc_flags;
         int ret;
  
@@ -1639,9 +1652,17 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
          */
         if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
                 domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
-               alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
-               alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
+
+               if (adev->gmc.is_app_apu) {
+                       domain = AMDGPU_GEM_DOMAIN_GTT;
+                       alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+                       alloc_flags = 0;
+               } else {
+                       alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+                       alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
                         AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
+               }
+               xcp_id = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id;
         } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
                 domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
                 alloc_flags = 0;
@@ -1693,17 +1714,19 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
  
         amdgpu_sync_create(&(*mem)->sync);
  
-       ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
+       ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags,
+                                             xcp_id);
         if (ret) {
                 pr_debug("Insufficient memory\n");
                 goto err_reserve_limit;
         }
  
-       pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
-                       va, (*mem)->aql_queue ? size << 1 : size, domain_string(alloc_domain));
+       pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s xcp_id %d\n",
+                va, (*mem)->aql_queue ? size << 1 : size,
+                domain_string(alloc_domain), xcp_id);
  
         ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
-                                      bo_type, NULL, &gobj);
+                                      bo_type, NULL, &gobj, xcp_id + 1);
         if (ret) {
                 pr_debug("Failed to create BO on domain %s. ret %d\n",
                          domain_string(alloc_domain), ret);
@@ -1728,6 +1751,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
         (*mem)->domain = domain;
         (*mem)->mapped_to_gpu_memory = 0;
         (*mem)->process_info = avm->process_info;
+
         add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
  
         if (user_addr) {
@@ -1759,7 +1783,7 @@ err_node_allow:
         /* Don't unreserve system mem limit twice */
         goto err_reserve_limit;
  err_bo_create:
-       amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
+       amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
  err_reserve_limit:
         mutex_destroy(&(*mem)->lock);
         if (gobj)
@@ -1783,7 +1807,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
         bool use_release_notifier = (mem->bo->kfd_bo == mem);
         struct kfd_mem_attachment *entry, *tmp;
         struct bo_vm_reservation_context ctx;
-       struct ttm_validate_buffer *bo_list_entry;
         unsigned int mapped_to_gpu_memory;
         int ret;
         bool is_imported = false;
@@ -1811,9 +1834,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
         }
  
         /* Make sure restore workers don't access the BO any more */
-       bo_list_entry = &mem->validate_list;
         mutex_lock(&process_info->lock);
-       list_del(&bo_list_entry->head);
+       list_del(&mem->validate_list);
         mutex_unlock(&process_info->lock);
  
         /* Cleanup user pages and MMU notifiers */
@@ -1855,11 +1877,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
         }
  
         /* Update the size of the BO being freed if it was allocated from
-        * VRAM and is not imported.
+        * VRAM and is not imported. For APP APU VRAM allocations are done
+        * in GTT domain
          */
         if (size) {
-               if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) &&
-                   (!is_imported))
+               if (!is_imported &&
+                  (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
+                  (adev->gmc.is_app_apu &&
+                   mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
                         *size = bo_size;
                 else
                         *size = 0;
@@ -2282,8 +2307,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
         (*mem)->dmabuf = dma_buf;
         (*mem)->bo = bo;
         (*mem)->va = va;
-       (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+       (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ?
                 AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+
         (*mem)->mapped_to_gpu_memory = 0;
         (*mem)->process_info = avm->process_info;
         add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
@@ -2376,14 +2402,14 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
         /* Move all invalidated BOs to the userptr_inval_list */
         list_for_each_entry_safe(mem, tmp_mem,
                                  &process_info->userptr_valid_list,
-                                validate_list.head)
+                                validate_list)
                 if (mem->invalid)
-                       list_move_tail(&mem->validate_list.head,
+                       list_move_tail(&mem->validate_list,
                                        &process_info->userptr_inval_list);
  
         /* Go through userptr_inval_list and update any invalid user_pages */
         list_for_each_entry(mem, &process_info->userptr_inval_list,
-                           validate_list.head) {
+                           validate_list) {
                 invalid = mem->invalid;
                 if (!invalid)
                         /* BO hasn't been invalidated since the last
@@ -2445,7 +2471,9 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
                         ret = -EAGAIN;
                         goto unlock_out;
                 }
-               mem->invalid = 0;
+                /* set mem valid if mem has hmm range associated */
+               if (mem->range)
+                       mem->invalid = 0;
         }
  
  unlock_out:
@@ -2461,50 +2489,41 @@ unlock_out:
   */
  static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
  {
-       struct amdgpu_bo_list_entry *pd_bo_list_entries;
-       struct list_head resv_list, duplicates;
-       struct ww_acquire_ctx ticket;
+       struct ttm_operation_ctx ctx = { false, false };
         struct amdgpu_sync sync;
+       struct drm_exec exec;
  
         struct amdgpu_vm *peer_vm;
         struct kgd_mem *mem, *tmp_mem;
         struct amdgpu_bo *bo;
-       struct ttm_operation_ctx ctx = { false, false };
-       int i, ret;
-
-       pd_bo_list_entries = kcalloc(process_info->n_vms,
-                                    sizeof(struct amdgpu_bo_list_entry),
-                                    GFP_KERNEL);
-       if (!pd_bo_list_entries) {
-               pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
-               ret = -ENOMEM;
-               goto out_no_mem;
-       }
-
-       INIT_LIST_HEAD(&resv_list);
-       INIT_LIST_HEAD(&duplicates);
+       int ret;
  
-       /* Get all the page directory BOs that need to be reserved */
-       i = 0;
-       list_for_each_entry(peer_vm, &process_info->vm_list_head,
-                           vm_list_node)
-               amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
-                                   &pd_bo_list_entries[i++]);
-       /* Add the userptr_inval_list entries to resv_list */
-       list_for_each_entry(mem, &process_info->userptr_inval_list,
-                           validate_list.head) {
-               list_add_tail(&mem->resv_list.head, &resv_list);
-               mem->resv_list.bo = mem->validate_list.bo;
-               mem->resv_list.num_shared = mem->validate_list.num_shared;
-       }
+       amdgpu_sync_create(&sync);
  
+       drm_exec_init(&exec, 0);
         /* Reserve all BOs and page tables for validation */
-       ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
-       WARN(!list_empty(&duplicates), "Duplicates should be empty");
-       if (ret)
-               goto out_free;
+       drm_exec_until_all_locked(&exec) {
+               /* Reserve all the page directories */
+               list_for_each_entry(peer_vm, &process_info->vm_list_head,
+                                   vm_list_node) {
+                       ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
+                       drm_exec_retry_on_contention(&exec);
+                       if (unlikely(ret))
+                               goto unreserve_out;
+               }
  
-       amdgpu_sync_create(&sync);
+               /* Reserve the userptr_inval_list entries to resv_list */
+               list_for_each_entry(mem, &process_info->userptr_inval_list,
+                                   validate_list) {
+                       struct drm_gem_object *gobj;
+
+                       gobj = &mem->bo->tbo.base;
+                       ret = drm_exec_prepare_obj(&exec, gobj, 1);
+                       drm_exec_retry_on_contention(&exec);
+                       if (unlikely(ret))
+                               goto unreserve_out;
+               }
+       }
  
         ret = process_validate_vms(process_info);
         if (ret)
@@ -2513,7 +2532,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
         /* Validate BOs and update GPUVM page tables */
         list_for_each_entry_safe(mem, tmp_mem,
                                  &process_info->userptr_inval_list,
-                                validate_list.head) {
+                                validate_list) {
                 struct kfd_mem_attachment *attachment;
  
                 bo = mem->bo;
@@ -2555,12 +2574,9 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
         ret = process_update_pds(process_info, &sync);
  
  unreserve_out:
-       ttm_eu_backoff_reservation(&ticket, &resv_list);
+       drm_exec_fini(&exec);
         amdgpu_sync_wait(&sync, false);
         amdgpu_sync_free(&sync);
-out_free:
-       kfree(pd_bo_list_entries);
-out_no_mem:
  
         return ret;
  }
@@ -2576,9 +2592,16 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i
  
         list_for_each_entry_safe(mem, tmp_mem,
                                  &process_info->userptr_inval_list,
-                                validate_list.head) {
-               bool valid = amdgpu_ttm_tt_get_user_pages_done(
-                               mem->bo->tbo.ttm, mem->range);
+                                validate_list) {
+               bool valid;
+
+               /* keep mem without hmm range at userptr_inval_list */
+               if (!mem->range)
+                        continue;
+
+               /* Only check mem with hmm range associated */
+               valid = amdgpu_ttm_tt_get_user_pages_done(
+                                       mem->bo->tbo.ttm, mem->range);
  
                 mem->range = NULL;
                 if (!valid) {
@@ -2586,9 +2609,14 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i
                         ret = -EAGAIN;
                         continue;
                 }
-               WARN(mem->invalid, "Valid BO is marked invalid");
  
-               list_move_tail(&mem->validate_list.head,
+               if (mem->invalid) {
+                       WARN(1, "Valid BO is marked invalid");
+                       ret = -EAGAIN;
+                       continue;
+               }
+
+               list_move_tail(&mem->validate_list,
                                &process_info->userptr_valid_list);
         }
  
@@ -2698,50 +2726,44 @@ unlock_out:
   */
  int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
  {
-       struct amdgpu_bo_list_entry *pd_bo_list;
         struct amdkfd_process_info *process_info = info;
         struct amdgpu_vm *peer_vm;
         struct kgd_mem *mem;
-       struct bo_vm_reservation_context ctx;
         struct amdgpu_amdkfd_fence *new_fence;
-       int ret = 0, i;
         struct list_head duplicate_save;
         struct amdgpu_sync sync_obj;
         unsigned long failed_size = 0;
         unsigned long total_size = 0;
+       struct drm_exec exec;
+       int ret;
  
         INIT_LIST_HEAD(&duplicate_save);
-       INIT_LIST_HEAD(&ctx.list);
-       INIT_LIST_HEAD(&ctx.duplicates);
-
-       pd_bo_list = kcalloc(process_info->n_vms,
-                            sizeof(struct amdgpu_bo_list_entry),
-                            GFP_KERNEL);
-       if (!pd_bo_list)
-               return -ENOMEM;
  
-       i = 0;
         mutex_lock(&process_info->lock);
-       list_for_each_entry(peer_vm, &process_info->vm_list_head,
-                       vm_list_node)
-               amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]);
-
-       /* Reserve all BOs and page tables/directory. Add all BOs from
-        * kfd_bo_list to ctx.list
-        */
-       list_for_each_entry(mem, &process_info->kfd_bo_list,
-                           validate_list.head) {
  
-               list_add_tail(&mem->resv_list.head, &ctx.list);
-               mem->resv_list.bo = mem->validate_list.bo;
-               mem->resv_list.num_shared = mem->validate_list.num_shared;
-       }
+       drm_exec_init(&exec, 0);
+       drm_exec_until_all_locked(&exec) {
+               list_for_each_entry(peer_vm, &process_info->vm_list_head,
+                                   vm_list_node) {
+                       ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
+                       drm_exec_retry_on_contention(&exec);
+                       if (unlikely(ret))
+                               goto ttm_reserve_fail;
+               }
  
-       ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
-                                    false, &duplicate_save);
-       if (ret) {
-               pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
-               goto ttm_reserve_fail;
+               /* Reserve all BOs and page tables/directory. Add all BOs from
+                * kfd_bo_list to ctx.list
+                */
+               list_for_each_entry(mem, &process_info->kfd_bo_list,
+                                   validate_list) {
+                       struct drm_gem_object *gobj;
+
+                       gobj = &mem->bo->tbo.base;
+                       ret = drm_exec_prepare_obj(&exec, gobj, 1);
+                       drm_exec_retry_on_contention(&exec);
+                       if (unlikely(ret))
+                               goto ttm_reserve_fail;
+               }
         }
  
         amdgpu_sync_create(&sync_obj);
@@ -2759,7 +2781,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
  
         /* Validate BOs and map them to GPUVM (update VM page tables). */
         list_for_each_entry(mem, &process_info->kfd_bo_list,
-                           validate_list.head) {
+                           validate_list) {
  
                 struct amdgpu_bo *bo = mem->bo;
                 uint32_t domain = mem->domain;
@@ -2832,8 +2854,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
         *ef = dma_fence_get(&new_fence->base);
  
         /* Attach new eviction fence to all BOs except pinned ones */
-       list_for_each_entry(mem, &process_info->kfd_bo_list,
-               validate_list.head) {
+       list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) {
                 if (mem->bo->tbo.pin_count)
                         continue;
  
@@ -2852,11 +2873,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
         }
  
  validate_map_fail:
-       ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list);
         amdgpu_sync_free(&sync_obj);
  ttm_reserve_fail:
+       drm_exec_fini(&exec);
         mutex_unlock(&process_info->lock);
-       kfree(pd_bo_list);
         return ret;
  }