drm/amdgpu: add wave limit functionality for gfx8,9
authorNirmoy Das <nirmoy.das@amd.com>
Wed, 27 Jan 2021 10:35:54 +0000 (11:35 +0100)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 9 Feb 2021 20:27:04 +0000 (15:27 -0500)
Wave limiting can be use to load balance high priority
compute jobs along with gfx jobs. When enabled, this will reserve
~75% of waves for compute jobs.

We do not need this from gfx10 onwards because >=gfx10 has
asynchronous compute tunneling to replace wave limit requirement.

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

index 2ada80c..56acec1 100644 (file)
@@ -197,6 +197,7 @@ struct amdgpu_ring_funcs {
        void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
        int (*preempt_ib)(struct amdgpu_ring *ring);
        void (*emit_mem_sync)(struct amdgpu_ring *ring);
+       void (*emit_wave_limit)(struct amdgpu_ring *ring, bool enable);
 };
 
 struct amdgpu_ring {
index b0284c4..bdfd29a 100644 (file)
@@ -29,6 +29,7 @@
 
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
+#include "amdgpu_ring.h"
 #include "vi.h"
 #include "vi_structs.h"
 #include "vid.h"
@@ -6845,6 +6846,19 @@ static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
        amdgpu_ring_write(ring, 0x0000000A);    /* poll interval */
 }
 
+#define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT     0x07ffffff
+static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
+{
+       uint32_t val;
+
+       /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
+        * number of gfx waves. Setting 5 bit will make sure gfx only gets
+        * around 25% of gpu resources.
+        */
+       val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
+       amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
+}
+
 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
        .name = "gfx_v8_0",
        .early_init = gfx_v8_0_early_init,
@@ -6928,7 +6942,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
                7 + /* gfx_v8_0_ring_emit_pipeline_sync */
                VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
                7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
-               7, /* gfx_v8_0_emit_mem_sync_compute */
+               7 + /* gfx_v8_0_emit_mem_sync_compute */
+               5, /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
        .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
        .emit_ib = gfx_v8_0_ring_emit_ib_compute,
        .emit_fence = gfx_v8_0_ring_emit_fence_compute,
@@ -6942,6 +6957,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .emit_wreg = gfx_v8_0_ring_emit_wreg,
        .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
+       .emit_wave_limit = gfx_v8_0_emit_wave_limit,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
index 7b13f9d..027997e 100644 (file)
@@ -52,6 +52,7 @@
 
 #include "asic_reg/pwr/pwr_10_0_offset.h"
 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
+#include "asic_reg/gc/gc_9_0_default.h"
 
 #define GFX9_NUM_GFX_RINGS     1
 #define GFX9_MEC_HPD_SIZE 4096
@@ -6667,6 +6668,22 @@ static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
        amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
 }
 
+static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
+{
+       struct amdgpu_device *adev = ring->adev;
+       uint32_t val;
+
+
+       /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
+        * number of gfx waves. Setting 5 bit will make sure gfx only gets
+        * around 25% of gpu resources.
+        */
+       val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
+       amdgpu_ring_emit_wreg(ring,
+                             SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
+                             val);
+}
+
 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
        .name = "gfx_v9_0",
        .early_init = gfx_v9_0_early_init,
@@ -6756,7 +6773,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
                SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
                2 + /* gfx_v9_0_ring_emit_vm_flush */
                8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
-               7, /* gfx_v9_0_emit_mem_sync */
+               7 + /* gfx_v9_0_emit_mem_sync */
+               5, /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
        .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
        .emit_ib = gfx_v9_0_ring_emit_ib_compute,
        .emit_fence = gfx_v9_0_ring_emit_fence,
@@ -6772,6 +6790,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
        .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
        .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
        .emit_mem_sync = gfx_v9_0_emit_mem_sync,
+       .emit_wave_limit = gfx_v9_0_emit_wave_limit,
 };
 
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {