Merge tag 'kbuild-v6.7' of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy...

[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gmc_v10_0.c
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c

index fa87a85..d8a4fdd 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -51,8 +51,6 @@
  #include "athub_v2_0.h"
  #include "athub_v2_1.h"
  
-#include "amdgpu_reset.h"
-
  static int gmc_v10_0_ecc_interrupt_state(struct amdgpu_device *adev,
                                          struct amdgpu_irq_src *src,
                                          unsigned int type,
@@ -145,11 +143,15 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
                  * the new fast GRBM interface.
                  */
                 if ((entry->vmid_src == AMDGPU_GFXHUB(0)) &&
-                   (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
+                   (amdgpu_ip_version(adev, GC_HWIP, 0) <
+                    IP_VERSION(10, 3, 0)))
                         RREG32(hub->vm_l2_pro_fault_status);
  
                 status = RREG32(hub->vm_l2_pro_fault_status);
                 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+               amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
+                                            entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
         }
  
         if (!printk_ratelimit())
@@ -230,20 +232,47 @@ static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
   * by the amdgpu vm/hsa code.
   */
  
-static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
-                                  unsigned int vmhub, uint32_t flush_type)
+/**
+ * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: vmhub type
+ * @flush_type: the flush type
+ *
+ * Flush the TLB for the requested page table.
+ */
+static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+                                       uint32_t vmhub, uint32_t flush_type)
  {
         bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
         struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
         u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
-       u32 tmp;
         /* Use register 17 for GART */
         const unsigned int eng = 17;
-       unsigned int i;
         unsigned char hub_ip = 0;
+       u32 sem, req, ack;
+       unsigned int i;
+       u32 tmp;
+
+       sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+       req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+       ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+       /* flush hdp cache */
+       adev->hdp.funcs->flush_hdp(adev, NULL);
+
+       /* For SRIOV run time, driver shouldn't access the register through MMIO
+        * Directly use kiq to do the vm invalidation instead
+        */
+       if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
+           (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
+               amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
+                               1 << vmid);
+               return;
+       }
  
-       hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
-                  GC_HWIP : MMHUB_HWIP;
+       hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
  
         spin_lock(&adev->gmc.invalidate_lock);
         /*
@@ -257,9 +286,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
         if (use_semaphore) {
                 for (i = 0; i < adev->usec_timeout; i++) {
                         /* a read return value of 1 means semaphore acuqire */
-                       tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
-                                        hub->eng_distance * eng, hub_ip);
-
+                       tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
                         if (tmp & 0x1)
                                 break;
                         udelay(1);
@@ -269,24 +296,19 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
                         DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
         }
  
-       WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
-                         hub->eng_distance * eng,
-                         inv_req, hub_ip);
+       WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);
  
         /*
          * Issue a dummy read to wait for the ACK register to be cleared
          * to avoid a false ACK due to the new fast GRBM interface.
          */
         if ((vmhub == AMDGPU_GFXHUB(0)) &&
-           (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
-               RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
-                                 hub->eng_distance * eng, hub_ip);
+           (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 3, 0)))
+               RREG32_RLC_NO_KIQ(req, hub_ip);
  
         /* Wait for ACK with a delay.*/
         for (i = 0; i < adev->usec_timeout; i++) {
-               tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
-                                 hub->eng_distance * eng, hub_ip);
-
+               tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
                 tmp &= 1 << vmid;
                 if (tmp)
                         break;
@@ -296,109 +318,13 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
  
         /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
         if (use_semaphore)
-               /*
-                * add semaphore release after invalidation,
-                * write with 0 means semaphore release
-                */
-               WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
-                                 hub->eng_distance * eng, 0, hub_ip);
+               WREG32_RLC_NO_KIQ(sem, 0, hub_ip);
  
         spin_unlock(&adev->gmc.invalidate_lock);
  
-       if (i < adev->usec_timeout)
-               return;
-
-       DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
-}
-
-/**
- * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
- *
- * @adev: amdgpu_device pointer
- * @vmid: vm instance to flush
- * @vmhub: vmhub type
- * @flush_type: the flush type
- *
- * Flush the TLB for the requested page table.
- */
-static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
-                                       uint32_t vmhub, uint32_t flush_type)
-{
-       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
-       struct dma_fence *fence;
-       struct amdgpu_job *job;
-
-       int r;
-
-       /* flush hdp cache */
-       adev->hdp.funcs->flush_hdp(adev, NULL);
-
-       /* For SRIOV run time, driver shouldn't access the register through MMIO
-        * Directly use kiq to do the vm invalidation instead
-        */
-       if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
-           (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
-           down_read_trylock(&adev->reset_domain->sem)) {
-               struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
-               const unsigned int eng = 17;
-               u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
-               u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
-               u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
-
-               amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-                               1 << vmid);
-
-               up_read(&adev->reset_domain->sem);
-               return;
-       }
-
-       mutex_lock(&adev->mman.gtt_window_lock);
-
-       if (vmhub == AMDGPU_MMHUB0(0)) {
-               gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0);
-               mutex_unlock(&adev->mman.gtt_window_lock);
-               return;
-       }
-
-       BUG_ON(vmhub != AMDGPU_GFXHUB(0));
-
-       if (!adev->mman.buffer_funcs_enabled ||
-           !adev->ib_pool_ready ||
-           amdgpu_in_reset(adev) ||
-           ring->sched.ready == false) {
-               gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0);
-               mutex_unlock(&adev->mman.gtt_window_lock);
-               return;
-       }
-
-       /* The SDMA on Navi has a bug which can theoretically result in memory
-        * corruption if an invalidation happens at the same time as an VA
-        * translation. Avoid this by doing the invalidation from the SDMA
-        * itself.
-        */
-       r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
-                                    AMDGPU_FENCE_OWNER_UNDEFINED,
-                                    16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
-                                    &job);
-       if (r)
-               goto error_alloc;
-
-       job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
-       job->vm_needs_flush = true;
-       job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
-       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
-       fence = amdgpu_job_submit(job);
-
-       mutex_unlock(&adev->mman.gtt_window_lock);
-
-       dma_fence_wait(fence, false);
-       dma_fence_put(fence);
-
-       return;
-
-error_alloc:
-       mutex_unlock(&adev->mman.gtt_window_lock);
-       DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
+       if (i >= adev->usec_timeout)
+               dev_err(adev->dev, "Timeout waiting for VM flush hub: %d!\n",
+                       vmhub);
  }
  
  /**
@@ -412,62 +338,31 @@ error_alloc:
   *
   * Flush the TLB for the requested pasid.
   */
-static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-                                       uint16_t pasid, uint32_t flush_type,
-                                       bool all_hub, uint32_t inst)
+static void gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+                                         uint16_t pasid, uint32_t flush_type,
+                                         bool all_hub, uint32_t inst)
  {
+       uint16_t queried;
         int vmid, i;
-       signed long r;
-       uint32_t seq;
-       uint16_t queried_pasid;
-       bool ret;
-       u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
-       struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring;
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
-
-       if (amdgpu_emu_mode == 0 && ring->sched.ready) {
-               spin_lock(&adev->gfx.kiq[0].ring_lock);
-               /* 2 dwords flush + 8 dwords fence */
-               amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
-               kiq->pmf->kiq_invalidate_tlbs(ring,
-                                       pasid, flush_type, all_hub);
-               r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
-               if (r) {
-                       amdgpu_ring_undo(ring);
-                       spin_unlock(&adev->gfx.kiq[0].ring_lock);
-                       return -ETIME;
-               }
-
-               amdgpu_ring_commit(ring);
-               spin_unlock(&adev->gfx.kiq[0].ring_lock);
-               r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
-               if (r < 1) {
-                       dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
-                       return -ETIME;
-               }
-
-               return 0;
-       }
  
         for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
-
-               ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
-                               &queried_pasid);
-               if (ret && queried_pasid == pasid) {
-                       if (all_hub) {
-                               for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
-                                       gmc_v10_0_flush_gpu_tlb(adev, vmid,
-                                                       i, flush_type);
-                       } else {
-                               gmc_v10_0_flush_gpu_tlb(adev, vmid,
-                                               AMDGPU_GFXHUB(0), flush_type);
-                       }
-                       if (!adev->enable_mes)
-                               break;
+               bool valid;
+
+               valid = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+                                                                 &queried);
+               if (!valid || queried != pasid)
+                       continue;
+
+               if (all_hub) {
+                       for_each_set_bit(i, adev->vmhubs_mask,
+                                        AMDGPU_MAX_VMHUBS)
+                               gmc_v10_0_flush_gpu_tlb(adev, vmid, i,
+                                                       flush_type);
+               } else {
+                       gmc_v10_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0),
+                                               flush_type);
                 }
         }
-
-       return 0;
  }
  
  static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
@@ -634,6 +529,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
         }
  
         if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+                              AMDGPU_GEM_CREATE_EXT_COHERENT |
                                AMDGPU_GEM_CREATE_UNCACHED))
                 *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
                          AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
@@ -680,7 +576,7 @@ static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev)
  
  static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)
  {
-       switch (adev->ip_versions[UMC_HWIP][0]) {
+       switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
         case IP_VERSION(8, 7, 0):
                 adev->umc.max_ras_err_cnt_per_query = UMC_V8_7_TOTAL_CHANNEL_NUM;
                 adev->umc.channel_inst_num = UMC_V8_7_CHANNEL_INSTANCE_NUM;
@@ -697,7 +593,7 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)
  
  static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev)
  {
-       switch (adev->ip_versions[MMHUB_HWIP][0]) {
+       switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
         case IP_VERSION(2, 3, 0):
         case IP_VERSION(2, 4, 0):
         case IP_VERSION(2, 4, 1):
@@ -711,7 +607,7 @@ static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev)
  
  static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev)
  {
-       switch (adev->ip_versions[GC_HWIP][0]) {
+       switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
         case IP_VERSION(10, 3, 0):
         case IP_VERSION(10, 3, 2):
         case IP_VERSION(10, 3, 1):
@@ -777,8 +673,9 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
         base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
  
         amdgpu_gmc_vram_location(adev, &adev->gmc, base);
-       amdgpu_gmc_gart_location(adev, mc);
-       amdgpu_gmc_agp_location(adev, mc);
+       amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
+       if (!amdgpu_sriov_vf(adev))
+               amdgpu_gmc_agp_location(adev, mc);
  
         /* base offset of vram pages */
         adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
@@ -825,7 +722,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
  
         /* set the gart size */
         if (amdgpu_gart_size == -1) {
-               switch (adev->ip_versions[GC_HWIP][0]) {
+               switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
                 default:
                         adev->gmc.gart_size = 512ULL << 20;
                         break;
@@ -892,7 +789,7 @@ static int gmc_v10_0_sw_init(void *handle)
                 adev->gmc.vram_vendor = vram_vendor;
         }
  
-       switch (adev->ip_versions[GC_HWIP][0]) {
+       switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
         case IP_VERSION(10, 3, 0):
                 adev->gmc.mall_size = 128 * 1024 * 1024;
                 break;
@@ -910,7 +807,7 @@ static int gmc_v10_0_sw_init(void *handle)
                 break;
         }
  
-       switch (adev->ip_versions[GC_HWIP][0]) {
+       switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
         case IP_VERSION(10, 1, 10):
         case IP_VERSION(10, 1, 1):
         case IP_VERSION(10, 1, 2):
@@ -1084,8 +981,10 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
  
  static int gmc_v10_0_hw_init(void *handle)
  {
-       int r;
         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       int r;
+
+       adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode;
  
         /* The sequence of these two function calls matters.*/
         gmc_v10_0_init_golden_registers(adev);
@@ -1195,7 +1094,8 @@ static int gmc_v10_0_set_clockgating_state(void *handle,
          * is a new problem observed at DF 3.0.3, however with the same suspend sequence not
          * seen any issue on the DF 3.0.2 series platform.
          */
-       if (adev->in_s0ix && adev->ip_versions[DF_HWIP][0] > IP_VERSION(3, 0, 2)) {
+       if (adev->in_s0ix &&
+           amdgpu_ip_version(adev, DF_HWIP, 0) > IP_VERSION(3, 0, 2)) {
                 dev_dbg(adev->dev, "keep mmhub clock gating being enabled for s0ix\n");
                 return 0;
         }
@@ -1204,7 +1104,7 @@ static int gmc_v10_0_set_clockgating_state(void *handle,
         if (r)
                 return r;
  
-       if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0))
+       if (amdgpu_ip_version(adev, ATHUB_HWIP, 0) >= IP_VERSION(2, 1, 0))
                 return athub_v2_1_set_clockgating(adev, state);
         else
                 return athub_v2_0_set_clockgating(adev, state);
@@ -1214,13 +1114,13 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u64 *flags)
  {
         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  
-       if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 3) ||
-           adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 4))
+       if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 3) ||
+           amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 4))
                 return;
  
         adev->mmhub.funcs->get_clockgating(adev, flags);
  
-       if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0))
+       if (amdgpu_ip_version(adev, ATHUB_HWIP, 0) >= IP_VERSION(2, 1, 0))
                 athub_v2_1_get_clockgating(adev, flags);
         else
                 athub_v2_0_get_clockgating(adev, flags);