drm/amdgpu: Allocate GART table in RAM for AMD APU
authorFelix Kuehling <Felix.Kuehling@amd.com>
Tue, 29 Nov 2022 17:45:26 +0000 (12:45 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 9 Jun 2023 13:51:06 +0000 (09:51 -0400)
Some AMD APUs may not have a dedicated VRAM. On such platforms the GART
table should be allocated on the system memory. When real vram size is
zero, place the GART table in system memory and create an SG BO to make
it GPU accessible.

v2: fix includes

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
(rajneesh: removed set_memory_wc workaround)
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index 6b12f4a..a070adf 100644 (file)
@@ -35,6 +35,7 @@
 #endif
 #include "amdgpu.h"
 #include <drm/drm_drv.h>
+#include <drm/ttm/ttm_tt.h>
 
 /*
  * GART
@@ -102,6 +103,142 @@ void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
        adev->dummy_page_addr = 0;
 }
 
+/**
+ * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate system memory for GART page table for ASICs that don't have
+ * dedicated VRAM.
+ * Returns 0 for success, error for failure.
+ */
+int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev)
+{
+       unsigned int order = get_order(adev->gart.table_size);
+       gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO;
+       struct amdgpu_bo *bo = NULL;
+       struct sg_table *sg = NULL;
+       struct amdgpu_bo_param bp;
+       dma_addr_t dma_addr;
+       struct page *p;
+       int ret;
+
+       if (adev->gart.bo != NULL)
+               return 0;
+
+       p = alloc_pages(gfp_flags, order);
+       if (!p)
+               return -ENOMEM;
+
+       /* If the hardware does not support UTCL2 snooping of the CPU caches
+        * then set_memory_wc() could be used as a workaround to mark the pages
+        * as write combine memory.
+        */
+       dma_addr = dma_map_page(&adev->pdev->dev, p, 0, adev->gart.table_size,
+                               DMA_BIDIRECTIONAL);
+       if (dma_mapping_error(&adev->pdev->dev, dma_addr)) {
+               dev_err(&adev->pdev->dev, "Failed to DMA MAP the GART BO page\n");
+               __free_pages(p, order);
+               p = NULL;
+               return -EFAULT;
+       }
+
+       dev_info(adev->dev, "%s dma_addr:%llx\n", __func__, dma_addr);
+       /* Create SG table */
+       sg = kmalloc(sizeof(*sg), GFP_KERNEL);
+       if (!sg) {
+               ret = -ENOMEM;
+               goto error;
+       }
+       ret = sg_alloc_table(sg, 1, GFP_KERNEL);
+       if (ret)
+               goto error;
+
+       sg_dma_address(sg->sgl) = dma_addr;
+       sg->sgl->length = adev->gart.table_size;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+       sg->sgl->dma_length = adev->gart.table_size;
+#endif
+       /* Create SG BO */
+       memset(&bp, 0, sizeof(bp));
+       bp.size = adev->gart.table_size;
+       bp.byte_align = PAGE_SIZE;
+       bp.domain = AMDGPU_GEM_DOMAIN_CPU;
+       bp.type = ttm_bo_type_sg;
+       bp.resv = NULL;
+       bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+       bp.flags = 0;
+       ret = amdgpu_bo_create(adev, &bp, &bo);
+       if (ret)
+               goto error;
+
+       bo->tbo.sg = sg;
+       bo->tbo.ttm->sg = sg;
+       bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+       bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+
+       ret = amdgpu_bo_reserve(bo, true);
+       if (ret) {
+               dev_err(adev->dev, "(%d) failed to reserve bo for GART system bo\n", ret);
+               goto error;
+       }
+
+       ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+       WARN(ret, "Pinning the GART table failed");
+       if (ret)
+               goto error_resv;
+
+       adev->gart.bo = bo;
+       adev->gart.ptr = page_to_virt(p);
+       /* Make GART table accessible in VMID0 */
+       ret = amdgpu_ttm_alloc_gart(&adev->gart.bo->tbo);
+       if (ret)
+               amdgpu_gart_table_ram_free(adev);
+       amdgpu_bo_unreserve(bo);
+
+       return 0;
+
+error_resv:
+       amdgpu_bo_unreserve(bo);
+error:
+       amdgpu_bo_unref(&bo);
+       if (sg) {
+               sg_free_table(sg);
+               kfree(sg);
+       }
+       __free_pages(p, order);
+       return ret;
+}
+
+/**
+ * amdgpu_gart_table_ram_free - free gart page table system ram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Free the system memory used for the GART page tableon ASICs that don't
+ * have dedicated VRAM.
+ */
+void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
+{
+       unsigned int order = get_order(adev->gart.table_size);
+       struct sg_table *sg = adev->gart.bo->tbo.sg;
+       struct page *p;
+       int ret;
+
+       ret = amdgpu_bo_reserve(adev->gart.bo, false);
+       if (!ret) {
+               amdgpu_bo_unpin(adev->gart.bo);
+               amdgpu_bo_unreserve(adev->gart.bo);
+       }
+       amdgpu_bo_unref(&adev->gart.bo);
+       sg_free_table(sg);
+       kfree(sg);
+       p = virt_to_page(adev->gart.ptr);
+       __free_pages(p, order);
+
+       adev->gart.ptr = NULL;
+}
+
 /**
  * amdgpu_gart_table_vram_alloc - allocate vram for gart page table
  *
index 8fea3e0..8283d68 100644 (file)
@@ -51,6 +51,8 @@ struct amdgpu_gart {
        uint64_t                        gart_pte_flags;
 };
 
+int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev);
+void amdgpu_gart_table_ram_free(struct amdgpu_device *adev);
 int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);
 void amdgpu_gart_table_vram_free(struct amdgpu_device *adev);
 int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
index 444441c..aca8489 100644 (file)
@@ -1688,12 +1688,18 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
        adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
                                 AMDGPU_PTE_EXECUTABLE;
 
-       r = amdgpu_gart_table_vram_alloc(adev);
-       if (r)
-               return r;
+       if (!adev->gmc.real_vram_size) {
+               dev_info(adev->dev, "Put GART in system memory for APU\n");
+               r = amdgpu_gart_table_ram_alloc(adev);
+               if (r)
+                       dev_err(adev->dev, "Failed to allocate GART in system memory\n");
+       } else {
+               r = amdgpu_gart_table_vram_alloc(adev);
+               if (r)
+                       return r;
 
-       if (adev->gmc.xgmi.connected_to_cpu) {
-               r = amdgpu_gmc_pdb0_alloc(adev);
+               if (adev->gmc.xgmi.connected_to_cpu)
+                       r = amdgpu_gmc_pdb0_alloc(adev);
        }
 
        return r;
@@ -1902,7 +1908,12 @@ static int gmc_v9_0_sw_fini(void *handle)
        amdgpu_gmc_ras_fini(adev);
        amdgpu_gem_force_release(adev);
        amdgpu_vm_manager_fini(adev);
-       amdgpu_gart_table_vram_free(adev);
+       if (!adev->gmc.real_vram_size) {
+               dev_info(adev->dev, "Put GART in system memory for APU free\n");
+               amdgpu_gart_table_ram_free(adev);
+       } else {
+               amdgpu_gart_table_vram_free(adev);
+       }
        amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0);
        amdgpu_bo_fini(adev);