drm/amdgpu: Remap hdp coherency registers
authorOak Zeng <Oak.Zeng@amd.com>
Thu, 4 Apr 2019 20:47:34 +0000 (15:47 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 24 May 2019 17:20:47 +0000 (12:20 -0500)
Remap HDP_MEM_COHERENCY_FLUSH_CNTL and HDP_REG_COHERENCY_FLUSH_CNTL
to an empty page in mmio space. We will later map this page to process
space so application can flush hdp. This can't be done properly at
those registers' original location because it will expose more than
desired registers to process space.

v2: Use explicit register hole location
v3: Moved remapped hdp registers into adev struct
v4: Use more generic name for remapped page
    Expose register offset in kfd_ioctl.h
v5: Move hdp register remap function to nbio ip function
v6: Fixed operator precedence issue and other bugs

Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
drivers/gpu/drm/amd/amdgpu/soc15.c
include/uapi/linux/kfd_ioctl.h

index 14398f5..23c3375 100644 (file)
@@ -639,6 +639,11 @@ struct nbio_hdp_flush_reg {
        u32 ref_and_mask_sdma1;
 };
 
+struct amdgpu_mmio_remap {
+       u32 reg_offset;
+       resource_size_t bus_addr;
+};
+
 struct amdgpu_nbio_funcs {
        const struct nbio_hdp_flush_reg *hdp_flush_reg;
        u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
@@ -666,6 +671,7 @@ struct amdgpu_nbio_funcs {
        void (*ih_control)(struct amdgpu_device *adev);
        void (*init_registers)(struct amdgpu_device *adev);
        void (*detect_hw_virt)(struct amdgpu_device *adev);
+       void (*remap_hdp_registers)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_df_funcs {
@@ -764,6 +770,7 @@ struct amdgpu_device {
        void __iomem                    *rmmio;
        /* protects concurrent MM_INDEX/DATA based register access */
        spinlock_t mmio_idx_lock;
+       struct amdgpu_mmio_remap        rmmio_remap;
        /* protects concurrent SMC based register access */
        spinlock_t smc_idx_lock;
        amdgpu_rreg_t                   smc_rreg;
index 1cdb98a..73419fa 100644 (file)
 #include "nbio/nbio_7_0_sh_mask.h"
 #include "nbio/nbio_7_0_smn.h"
 #include "vega10_enum.h"
+#include <uapi/linux/kfd_ioctl.h>
 
 #define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c
 
+static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev)
+{
+       WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
+               adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+       WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
+               adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
 static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
 {
         u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
@@ -55,10 +64,9 @@ static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev,
                                struct amdgpu_ring *ring)
 {
        if (!ring || !ring->funcs->emit_wreg)
-               WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+               WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
        else
-               amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
-                       NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
+               amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
 }
 
 static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
@@ -283,4 +291,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
        .ih_control = nbio_v7_0_ih_control,
        .init_registers = nbio_v7_0_init_registers,
        .detect_hw_virt = nbio_v7_0_detect_hw_virt,
+       .remap_hdp_registers = nbio_v7_0_remap_hdp_registers,
 };
index c69d515..bfaaa32 100644 (file)
 #include "nbio/nbio_7_4_offset.h"
 #include "nbio/nbio_7_4_sh_mask.h"
 #include "nbio/nbio_7_4_0_smn.h"
+#include <uapi/linux/kfd_ioctl.h>
 
 #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
 
+static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
+{
+       WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
+               adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+       WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
+               adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
 static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
 {
        u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
@@ -53,10 +62,9 @@ static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev,
                                struct amdgpu_ring *ring)
 {
        if (!ring || !ring->funcs->emit_wreg)
-               WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+               WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
        else
-               amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
-                       NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
+               amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
 }
 
 static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)
@@ -262,4 +270,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
        .ih_control = nbio_v7_4_ih_control,
        .init_registers = nbio_v7_4_init_registers,
        .detect_hw_virt = nbio_v7_4_detect_hw_virt,
+       .remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
 };
index 4900e49..78bd00a 100644 (file)
@@ -44,6 +44,7 @@
 #include "smuio/smuio_9_0_offset.h"
 #include "smuio/smuio_9_0_sh_mask.h"
 #include "nbio/nbio_7_0_default.h"
+#include "nbio/nbio_7_0_offset.h"
 #include "nbio/nbio_7_0_sh_mask.h"
 #include "nbio/nbio_7_0_smn.h"
 #include "mp/mp_9_0_offset.h"
@@ -64,6 +65,7 @@
 #include "dce_virtual.h"
 #include "mxgpu_ai.h"
 #include "amdgpu_smu.h"
+#include <uapi/linux/kfd_ioctl.h>
 
 #define mmMP0_MISC_CGTT_CTRL0                                                                   0x01b9
 #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX                                                          0
@@ -783,8 +785,11 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
 
 static int soc15_common_early_init(void *handle)
 {
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+       adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
        adev->smc_rreg = NULL;
        adev->smc_wreg = NULL;
        adev->pcie_rreg = &soc15_pcie_rreg;
@@ -1014,6 +1019,12 @@ static int soc15_common_hw_init(void *handle)
        soc15_program_aspm(adev);
        /* setup nbio registers */
        adev->nbio_funcs->init_registers(adev);
+       /* remap HDP registers to a hole in mmio space,
+        * for the purpose of expose those registers
+        * to process space
+        */
+       if (adev->nbio_funcs->remap_hdp_registers)
+               adev->nbio_funcs->remap_hdp_registers(adev);
        /* enable the doorbell aperture */
        soc15_enable_doorbell_aperture(adev, true);
        /* HW doorbell routing policy: doorbell writing not
index dc067ed..bb1b428 100644 (file)
@@ -426,6 +426,13 @@ struct kfd_ioctl_import_dmabuf_args {
        __u32 dmabuf_fd;        /* to KFD */
 };
 
+/* Register offset inside the remapped mmio page
+ */
+enum kfd_mmio_remap {
+       KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL = 0,
+       KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4,
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)                  _IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)           _IOR(AMDKFD_IOCTL_BASE, nr, type)