drm/amdgpu: implement more ib pools (v2)

[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

index fa24597..6f84b85 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -888,7 +888,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
         gpu_addr = adev->wb.gpu_addr + (index * 4);
         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
         memset(&ib, 0, sizeof(ib));
-       r = amdgpu_ib_get(adev, NULL, 16, &ib);
+       r = amdgpu_ib_get(adev, NULL, 16,
+                                       AMDGPU_IB_POOL_DIRECT, &ib);
         if (r)
                 goto err1;
  
@@ -1318,6 +1319,10 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
                         return r;
         }
  
+       /* init spm vmid with 0xf */
+       if (adev->gfx.rlc.funcs->update_spm_vmid)
+               adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
+
         return 0;
  }
  
@@ -1546,7 +1551,8 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
  
         /* allocate an indirect buffer to put the commands in */
         memset(&ib, 0, sizeof(ib));
-       r = amdgpu_ib_get(adev, NULL, total_size, &ib);
+       r = amdgpu_ib_get(adev, NULL, total_size,
+                                       AMDGPU_IB_POOL_DIRECT, &ib);
         if (r) {
                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
                 return r;
@@ -1820,6 +1826,11 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
  
+       adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
+                               MC_ARB_RAMCFG, NOOFBANK);
+       adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
+                               MC_ARB_RAMCFG, NOOFRANKS);
+
         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
         adev->gfx.config.mem_max_burst_length_bytes = 256;
         if (adev->flags & AMD_IS_APU) {
@@ -4421,6 +4432,22 @@ static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
         return r;
  }
  
+static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
+{
+       struct amdgpu_device *adev = ring->adev;
+
+       if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+               if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
+                       mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
+                       ring->has_high_prio = true;
+                       mqd->cp_hqd_queue_priority =
+                               AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
+               } else {
+                       ring->has_high_prio = false;
+               }
+       }
+}
+
  static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
  {
         struct amdgpu_device *adev = ring->adev;
@@ -4544,9 +4571,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
         /* defaults */
         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
-       mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
-       mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
-       mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
@@ -4558,6 +4582,10 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
  
+       /* set static priority for a queue/ring */
+       gfx_v8_0_mqd_set_priority(ring, mqd);
+       mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
+
         /* map_queues packet doesn't need activate the queue,
          * so only kiq need set this field.
          */
@@ -5589,6 +5617,18 @@ static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
         }
  }
  
+static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+{
+       u32 data;
+
+       data = RREG32(mmRLC_SPM_VMID);
+
+       data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
+       data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
+
+       WREG32(mmRLC_SPM_VMID, data);
+}
+
  static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
         .set_safe_mode = gfx_v8_0_set_safe_mode,
@@ -5600,7 +5640,8 @@ static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
         .resume = gfx_v8_0_rlc_resume,
         .stop = gfx_v8_0_rlc_stop,
         .reset = gfx_v8_0_rlc_reset,
-       .start = gfx_v8_0_rlc_start
+       .start = gfx_v8_0_rlc_start,
+       .update_spm_vmid = gfx_v8_0_update_spm_vmid
  };
  
  static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
@@ -6094,7 +6135,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
                 control |= INDIRECT_BUFFER_PRE_ENB(1);
  
-               if (!(ib->flags & AMDGPU_IB_FLAG_CE))
+               if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
                         gfx_v8_0_ring_emit_de_meta(ring);
         }
  
@@ -6236,104 +6277,6 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
  }
  
-static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
-                                          bool acquire)
-{
-       struct amdgpu_device *adev = ring->adev;
-       int pipe_num, tmp, reg;
-       int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
-
-       pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
-
-       /* first me only has 2 entries, GFX and HP3D */
-       if (ring->me > 0)
-               pipe_num -= 2;
-
-       reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
-       tmp = RREG32(reg);
-       tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
-       WREG32(reg, tmp);
-}
-
-static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
-                                           struct amdgpu_ring *ring,
-                                           bool acquire)
-{
-       int i, pipe;
-       bool reserve;
-       struct amdgpu_ring *iring;
-
-       mutex_lock(&adev->gfx.pipe_reserve_mutex);
-       pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
-       if (acquire)
-               set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
-       else
-               clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
-
-       if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
-               /* Clear all reservations - everyone reacquires all resources */
-               for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
-                       gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
-                                                      true);
-
-               for (i = 0; i < adev->gfx.num_compute_rings; ++i)
-                       gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
-                                                      true);
-       } else {
-               /* Lower all pipes without a current reservation */
-               for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
-                       iring = &adev->gfx.gfx_ring[i];
-                       pipe = amdgpu_gfx_mec_queue_to_bit(adev,
-                                                          iring->me,
-                                                          iring->pipe,
-                                                          0);
-                       reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
-                       gfx_v8_0_ring_set_pipe_percent(iring, reserve);
-               }
-
-               for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
-                       iring = &adev->gfx.compute_ring[i];
-                       pipe = amdgpu_gfx_mec_queue_to_bit(adev,
-                                                          iring->me,
-                                                          iring->pipe,
-                                                          0);
-                       reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
-                       gfx_v8_0_ring_set_pipe_percent(iring, reserve);
-               }
-       }
-
-       mutex_unlock(&adev->gfx.pipe_reserve_mutex);
-}
-
-static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
-                                     struct amdgpu_ring *ring,
-                                     bool acquire)
-{
-       uint32_t pipe_priority = acquire ? 0x2 : 0x0;
-       uint32_t queue_priority = acquire ? 0xf : 0x0;
-
-       mutex_lock(&adev->srbm_mutex);
-       vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-
-       WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
-       WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
-
-       vi_srbm_select(adev, 0, 0, 0, 0);
-       mutex_unlock(&adev->srbm_mutex);
-}
-static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
-                                              enum drm_sched_priority priority)
-{
-       struct amdgpu_device *adev = ring->adev;
-       bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
-
-       if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
-               return;
-
-       gfx_v8_0_hqd_set_priority(adev, ring, acquire);
-       gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
-}
-
  static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
                                              u64 addr, u64 seq,
                                              unsigned flags)
@@ -6966,7 +6909,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
         .test_ib = gfx_v8_0_ring_test_ib,
         .insert_nop = amdgpu_ring_insert_nop,
         .pad_ib = amdgpu_ring_generic_pad_ib,
-       .set_priority = gfx_v8_0_ring_set_priority_compute,
         .emit_wreg = gfx_v8_0_ring_emit_wreg,
  };