Merge tag 'amd-drm-next-6.3-2023-01-27' of https://gitlab.freedesktop.org/agd5f/linux...
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
index cfa411c..5bee3ff 100644 (file)
@@ -36,7 +36,9 @@
 #include <generated/utsrelease.h>
 #include <linux/pci-p2pdma.h>
 
+#include <drm/drm_aperture.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/amdgpu_drm.h>
@@ -90,6 +92,8 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
 #define AMDGPU_MAX_RETRY_LIMIT         2
 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
 
+static const struct drm_driver amdgpu_kms_driver;
+
 const char *amdgpu_asic_name[] = {
        "TAHITI",
        "PITCAIRN",
@@ -924,32 +928,33 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev)
 }
 
 /**
- * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
+ * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
  *
  * @adev: amdgpu_device pointer
  *
  * Allocates a scratch page of VRAM for use by various things in the
  * driver.
  */
-static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
+static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
 {
-       return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
-                                      PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
-                                      &adev->vram_scratch.robj,
-                                      &adev->vram_scratch.gpu_addr,
-                                      (void **)&adev->vram_scratch.ptr);
+       return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
+                                      AMDGPU_GEM_DOMAIN_VRAM |
+                                      AMDGPU_GEM_DOMAIN_GTT,
+                                      &adev->mem_scratch.robj,
+                                      &adev->mem_scratch.gpu_addr,
+                                      (void **)&adev->mem_scratch.ptr);
 }
 
 /**
- * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
+ * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
  *
  * @adev: amdgpu_device pointer
  *
  * Frees the VRAM scratch page.
  */
-static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
+static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
 {
-       amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
+       amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
 }
 
 /**
@@ -1981,17 +1986,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
        }
 
        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
-       err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
+       err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
        if (err) {
                dev_err(adev->dev,
-                       "Failed to load gpu_info firmware \"%s\"\n",
-                       fw_name);
-               goto out;
-       }
-       err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
-       if (err) {
-               dev_err(adev->dev,
-                       "Failed to validate gpu_info firmware \"%s\"\n",
+                       "Failed to get gpu_info firmware \"%s\"\n",
                        fw_name);
                goto out;
        }
@@ -2078,6 +2076,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
        struct drm_device *dev = adev_to_drm(adev);
        struct pci_dev *parent;
        int i, r;
+       bool total;
 
        amdgpu_device_enable_virtual_display(adev);
 
@@ -2161,6 +2160,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
        if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
                adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
 
+       total = true;
        for (i = 0; i < adev->num_ip_blocks; i++) {
                if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
                        DRM_ERROR("disabled ip block: %d <%s>\n",
@@ -2174,7 +2174,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
                                } else if (r) {
                                        DRM_ERROR("early_init of IP block <%s> failed %d\n",
                                                  adev->ip_blocks[i].version->funcs->name, r);
-                                       return r;
+                                       total = false;
                                } else {
                                        adev->ip_blocks[i].status.valid = true;
                                }
@@ -2205,6 +2205,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 
                }
        }
+       if (!total)
+               return -ENODEV;
 
        adev->cg_flags &= amdgpu_cg_mask;
        adev->pg_flags &= amdgpu_pg_mask;
@@ -2390,9 +2392,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
                        if (amdgpu_sriov_vf(adev))
                                amdgpu_virt_exchange_data(adev);
 
-                       r = amdgpu_device_vram_scratch_init(adev);
+                       r = amdgpu_device_mem_scratch_init(adev);
                        if (r) {
-                               DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
+                               DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
                                goto init_failed;
                        }
                        r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
@@ -2410,8 +2412,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
                        /* right after GMC hw init, we create CSA */
                        if (amdgpu_mcbp) {
                                r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
-                                                               AMDGPU_GEM_DOMAIN_VRAM,
-                                                               AMDGPU_CSA_SIZE);
+                                                              AMDGPU_GEM_DOMAIN_VRAM |
+                                                              AMDGPU_GEM_DOMAIN_GTT,
+                                                              AMDGPU_CSA_SIZE);
                                if (r) {
                                        DRM_ERROR("allocate CSA failed %d\n", r);
                                        goto init_failed;
@@ -2581,9 +2584,10 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
                i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
                if (!adev->ip_blocks[i].status.late_initialized)
                        continue;
-               /* skip CG for GFX on S0ix */
+               /* skip CG for GFX, SDMA on S0ix */
                if (adev->in_s0ix &&
-                   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
+                   (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
                        continue;
                /* skip CG for VCE/UVD, it's handled specially */
                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
@@ -2617,9 +2621,10 @@ int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
                i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
                if (!adev->ip_blocks[i].status.late_initialized)
                        continue;
-               /* skip PG for GFX on S0ix */
+               /* skip PG for GFX, SDMA on S0ix */
                if (adev->in_s0ix &&
-                   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
+                   (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
                        continue;
                /* skip CG for VCE/UVD, it's handled specially */
                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
@@ -2871,7 +2876,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
                        amdgpu_ucode_free_bo(adev);
                        amdgpu_free_static_csa(&adev->virt.csa_obj);
                        amdgpu_device_wb_fini(adev);
-                       amdgpu_device_vram_scratch_fini(adev);
+                       amdgpu_device_mem_scratch_fini(adev);
                        amdgpu_ib_pool_fini(adev);
                }
 
@@ -3016,14 +3021,33 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
                        continue;
                }
 
-               /* skip suspend of gfx and psp for S0ix
+               /* skip suspend of gfx/mes and psp for S0ix
                 * gfx is in gfxoff state, so on resume it will exit gfxoff just
                 * like at runtime. PSP is also part of the always on hardware
                 * so no need to suspend it.
                 */
                if (adev->in_s0ix &&
                    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
-                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX))
+                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
+                       continue;
+
+               /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
+               if (adev->in_s0ix &&
+                   (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
+                   (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
+                       continue;
+
+               /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
+                * These are in TMR, hence are expected to be reused by PSP-TOS to reload
+                * from this location and RLC Autoload automatically also gets loaded
+                * from here based on PMFW -> PSP message during re-init sequence.
+                * Therefore, the psp suspend & resume should be skipped to avoid destroy
+                * the TMR and reload FWs again for IMU enabled APU ASICs.
+                */
+               if (amdgpu_in_reset(adev) &&
+                   (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
+                   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
                        continue;
 
                /* XXX handle errors */
@@ -3226,15 +3250,6 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
                        return r;
                }
                adev->ip_blocks[i].status.hw = true;
-
-               if (adev->in_s0ix && adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
-                       /* disable gfxoff for IP resume. The gfxoff will be re-enabled in
-                        * amdgpu_device_resume() after IP resume.
-                        */
-                       amdgpu_gfx_off_ctrl(adev, false);
-                       DRM_DEBUG("will disable gfxoff for re-initializing other blocks\n");
-               }
-
        }
 
        return 0;
@@ -3686,6 +3701,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        if (r)
                return r;
 
+       /* Get rid of things like offb */
+       r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
+       if (r)
+               return r;
+
        /* Enable TMZ based on IP_VERSION */
        amdgpu_gmc_tmz_set(adev);
 
@@ -3988,10 +4008,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
        }
        amdgpu_fence_driver_hw_fini(adev);
 
-       if (adev->mman.initialized) {
-               flush_delayed_work(&adev->mman.bdev.wq);
-               ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
-       }
+       if (adev->mman.initialized)
+               drain_workqueue(adev->mman.bdev.wq);
 
        if (adev->pm_sysfs_en)
                amdgpu_pm_sysfs_fini(adev);
@@ -4023,8 +4041,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
 
        amdgpu_fence_driver_sw_fini(adev);
        amdgpu_device_ip_fini(adev);
-       release_firmware(adev->firmware.gpu_info_fw);
-       adev->firmware.gpu_info_fw = NULL;
+       amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
        adev->accel_working = false;
        dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
 
@@ -4112,6 +4129,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 
        adev->in_suspend = true;
 
+       /* Evict the majority of BOs before grabbing the full access */
+       r = amdgpu_device_evict_resources(adev);
+       if (r)
+               return r;
+
        if (amdgpu_sriov_vf(adev)) {
                amdgpu_virt_fini_data_exchange(adev);
                r = amdgpu_virt_request_full_gpu(adev, false);
@@ -4217,13 +4239,6 @@ exit:
        /* Make sure IB tests flushed */
        flush_delayed_work(&adev->delayed_init_work);
 
-       if (adev->in_s0ix) {
-               /* re-enable gfxoff after IP resume. This re-enables gfxoff after
-                * it was disabled for IP resume in amdgpu_device_ip_resume_phase2().
-                */
-               amdgpu_gfx_off_ctrl(adev, true);
-               DRM_DEBUG("will enable gfxoff for the mission mode\n");
-       }
        if (fbcon)
                drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
 
@@ -4604,11 +4619,6 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
        if (!amdgpu_ras_is_poison_mode_supported(adev))
                return true;
 
-       if (!amdgpu_device_ip_check_soft_reset(adev)) {
-               dev_info(adev->dev,"Timeout, but no hardware hang detected.\n");
-               return false;
-       }
-
        if (amdgpu_sriov_vf(adev))
                return true;
 
@@ -4733,7 +4743,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
                if (!need_full_reset)
                        need_full_reset = amdgpu_device_ip_need_full_reset(adev);
 
-               if (!need_full_reset && amdgpu_gpu_recovery) {
+               if (!need_full_reset && amdgpu_gpu_recovery &&
+                   amdgpu_device_ip_check_soft_reset(adev)) {
                        amdgpu_device_ip_pre_soft_reset(adev);
                        r = amdgpu_device_ip_soft_reset(adev);
                        amdgpu_device_ip_post_soft_reset(adev);
@@ -5859,8 +5870,8 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
 int amdgpu_in_reset(struct amdgpu_device *adev)
 {
        return atomic_read(&adev->reset_domain->in_gpu_reset);
-       }
-       
+}
+
 /**
  * amdgpu_device_halt() - bring hardware to some kind of halt state
  *