drm/amdkfd: add a new flag to manage where VRAM allocations go
authorAlex Deucher <alexander.deucher@amd.com>
Thu, 30 Jan 2025 20:12:58 +0000 (15:12 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 13 Feb 2025 02:04:08 +0000 (21:04 -0500)
On big and small APUs we send KFD VRAM allocations to GTT
since the carve out is either non-existent or relatively
small.  However, if someone sets the carve out size to be
relatively large, we may end up using GTT rather than VRAM.

No change of logic with this patch, but it allows the
driver to determine which logic to use based on the
carve out size in the future.

Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
drivers/gpu/drm/amd/amdkfd/kfd_svm.h

index f86daad..0dbea25 100644 (file)
@@ -1193,6 +1193,11 @@ struct amdgpu_device {
        struct mutex                    enforce_isolation_mutex;
 
        struct amdgpu_init_level *init_lvl;
+
+       /* This flag is used to determine how VRAM allocations are handled for APUs
+        * in KFD: VRAM or GTT.
+        */
+       bool                            apu_prefer_gtt;
 };
 
 static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
index 2c1b38c..0312231 100644 (file)
@@ -459,7 +459,7 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
                else
                        mem_info->local_mem_size_private =
                                        KFD_XCP_MEMORY_SIZE(adev, xcp->id);
-       } else if (adev->flags & AMD_IS_APU) {
+       } else if (adev->apu_prefer_gtt) {
                mem_info->local_mem_size_public = (ttm_tt_pages_limit() << PAGE_SHIFT);
                mem_info->local_mem_size_private = 0;
        } else {
@@ -818,7 +818,7 @@ u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id)
                }
                do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
                return ALIGN_DOWN(tmp, PAGE_SIZE);
-       } else if (adev->flags & AMD_IS_APU) {
+       } else if (adev->apu_prefer_gtt) {
                return (ttm_tt_pages_limit() << PAGE_SHIFT);
        } else {
                return adev->gmc.real_vram_size;
index 2b4d384..60062c1 100644 (file)
@@ -197,7 +197,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
                        return -EINVAL;
 
                vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
-               if (adev->flags & AMD_IS_APU) {
+               if (adev->apu_prefer_gtt) {
                        system_mem_needed = size;
                        ttm_mem_needed = size;
                }
@@ -234,7 +234,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
        if (adev && xcp_id >= 0) {
                adev->kfd.vram_used[xcp_id] += vram_needed;
                adev->kfd.vram_used_aligned[xcp_id] +=
-                               (adev->flags & AMD_IS_APU) ?
+                               adev->apu_prefer_gtt ?
                                vram_needed :
                                ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
        }
@@ -262,7 +262,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
 
                if (adev) {
                        adev->kfd.vram_used[xcp_id] -= size;
-                       if (adev->flags & AMD_IS_APU) {
+                       if (adev->apu_prefer_gtt) {
                                adev->kfd.vram_used_aligned[xcp_id] -= size;
                                kfd_mem_limit.system_mem_used -= size;
                                kfd_mem_limit.ttm_mem_used -= size;
@@ -890,7 +890,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
         * if peer device has large BAR. In contrast, access over xGMI is
         * allowed for both small and large BAR configurations of peer device
         */
-       if ((adev != bo_adev && !(adev->flags & AMD_IS_APU)) &&
+       if ((adev != bo_adev && !adev->apu_prefer_gtt) &&
            ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
             (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
             (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
@@ -1667,7 +1667,7 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
                - reserved_for_pt
                - reserved_for_ras;
 
-       if (adev->flags & AMD_IS_APU) {
+       if (adev->apu_prefer_gtt) {
                system_mem_available = no_system_mem_limit ?
                                        kfd_mem_limit.max_system_mem_limit :
                                        kfd_mem_limit.max_system_mem_limit -
@@ -1715,7 +1715,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
        if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
                domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
 
-               if (adev->flags & AMD_IS_APU) {
+               if (adev->apu_prefer_gtt) {
                        domain = AMDGPU_GEM_DOMAIN_GTT;
                        alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
                        alloc_flags = 0;
@@ -1966,7 +1966,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
        if (size) {
                if (!is_imported &&
                   (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
-                  ((adev->flags & AMD_IS_APU) &&
+                  (adev->apu_prefer_gtt &&
                    mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
                        *size = bo_size;
                else
@@ -2393,7 +2393,7 @@ static int import_obj_create(struct amdgpu_device *adev,
        (*mem)->bo = bo;
        (*mem)->va = va;
        (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) &&
-                        !(adev->flags & AMD_IS_APU) ?
+                        !adev->apu_prefer_gtt ?
                         AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
 
        (*mem)->mapped_to_gpu_memory = 0;
index 3f61c99..e68324e 100644 (file)
@@ -1987,6 +1987,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
        DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
                 (unsigned int)(gtt_size / (1024 * 1024)));
 
+       if (adev->flags & AMD_IS_APU)
+               adev->apu_prefer_gtt = true;
+
        /* Initialize doorbell pool on PCI BAR */
        r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE);
        if (r) {
index d05d199..79251f2 100644 (file)
@@ -1027,7 +1027,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
        if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 1))
                return -EINVAL;
 
-       if (adev->flags & AMD_IS_APU)
+       if (adev->apu_prefer_gtt)
                return 0;
 
        pgmap = &kfddev->pgmap;
index e32e191..db3034b 100644 (file)
@@ -2692,7 +2692,7 @@ svm_range_best_restore_location(struct svm_range *prange,
                return -1;
        }
 
-       if (node->adev->flags & AMD_IS_APU)
+       if (node->adev->apu_prefer_gtt)
                return 0;
 
        if (prange->preferred_loc == gpuid ||
@@ -3441,7 +3441,7 @@ svm_range_best_prefetch_location(struct svm_range *prange)
                goto out;
        }
 
-       if (bo_node->adev->flags & AMD_IS_APU) {
+       if (bo_node->adev->apu_prefer_gtt) {
                best_loc = 0;
                goto out;
        }
index bddd24f..6ea23c7 100644 (file)
@@ -202,7 +202,7 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s
  * is initialized to not 0 when page migration register device memory.
  */
 #define KFD_IS_SVM_API_SUPPORTED(adev) ((adev)->kfd.pgmap.type != 0 ||\
-                                       ((adev)->flags & AMD_IS_APU))
+                                       ((adev)->apu_prefer_gtt))
 
 void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);