drm/amdgpu: Add sysfs interface for sdma reset mask
authorJesse.zhang@amd.com <Jesse.zhang@amd.com>
Mon, 4 Nov 2024 05:15:49 +0000 (13:15 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 8 Nov 2024 16:45:18 +0000 (11:45 -0500)
Add the sysfs interface for sdma:
sdma_reset_mask

The interface is read-only and show the resets supported by the IP.
For example, full adapter reset (mode1/mode2/BACO/etc),
soft reset, queue reset, and pipe reset.

V2: the sysfs node returns a text string instead of some flags (Christian)
v3: add a generic helper which takes the ring as parameter
   and print the strings in the order they are applied (Christian)

   check amdgpu_gpu_recovery  before creating sysfs file itself,
   and initialize supported_reset_types in IP version files (Lijo)

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
Suggested-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Tim Huang <tim.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c

index 5868b4a..8c89b69 100644 (file)
@@ -413,3 +413,44 @@ void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev)
                            &amdgpu_debugfs_sdma_sched_mask_fops);
 #endif
 }
+
+static ssize_t amdgpu_get_sdma_reset_mask(struct device *dev,
+                                               struct device_attribute *attr,
+                                               char *buf)
+{
+       struct drm_device *ddev = dev_get_drvdata(dev);
+       struct amdgpu_device *adev = drm_to_adev(ddev);
+
+       if (!adev)
+               return -ENODEV;
+
+       return amdgpu_show_reset_mask(buf, adev->sdma.supported_reset);
+}
+
+static DEVICE_ATTR(sdma_reset_mask, 0444,
+                  amdgpu_get_sdma_reset_mask, NULL);
+
+int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+       int r = 0;
+
+       if (!amdgpu_gpu_recovery)
+               return r;
+
+       if (adev->sdma.num_instances) {
+               r = device_create_file(adev->dev, &dev_attr_sdma_reset_mask);
+               if (r)
+                       return r;
+       }
+
+       return r;
+}
+
+void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+       if (!amdgpu_gpu_recovery)
+               return;
+
+       if (adev->sdma.num_instances)
+               device_remove_file(adev->dev, &dev_attr_sdma_reset_mask);
+}
index a37fcd9..2db58b5 100644 (file)
@@ -116,6 +116,7 @@ struct amdgpu_sdma {
        struct ras_common_if    *ras_if;
        struct amdgpu_sdma_ras  *ras;
        uint32_t                *ip_dump;
+       uint32_t                supported_reset;
 };
 
 /*
@@ -176,4 +177,6 @@ void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
         bool duplicate);
 int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev);
+int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev);
+void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev);
 #endif
index 9c7cea0..a38553f 100644 (file)
@@ -1430,6 +1430,10 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block)
                }
        }
 
+       /* TODO: Add queue reset mask when FW fully supports it */
+       adev->sdma.supported_reset =
+               amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+
        if (amdgpu_sdma_ras_sw_init(adev)) {
                dev_err(adev->dev, "fail to initialize sdma ras block\n");
                return -EINVAL;
@@ -1442,6 +1446,10 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block)
        else
                DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
 
+       r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+       if (r)
+               return r;
+
        return r;
 }
 
@@ -1456,6 +1464,7 @@ static int sdma_v4_4_2_sw_fini(struct amdgpu_ip_block *ip_block)
                        amdgpu_ring_fini(&adev->sdma.instance[i].page);
        }
 
+       amdgpu_sdma_sysfs_reset_mask_fini(adev);
        if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) ||
            amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5))
                amdgpu_sdma_destroy_inst_ctx(adev, true);
index d31c486..fa9b409 100644 (file)
@@ -1452,6 +1452,19 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
                        return r;
        }
 
+       adev->sdma.supported_reset =
+               amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+       switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+       case IP_VERSION(5, 0, 0):
+       case IP_VERSION(5, 0, 2):
+       case IP_VERSION(5, 0, 5):
+               if (adev->sdma.instance[0].fw_version >= 35)
+                       adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+               break;
+       default:
+               break;
+       }
+
        /* Allocate memory for SDMA IP Dump buffer */
        ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
        if (ptr)
@@ -1459,6 +1472,10 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
        else
                DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
 
+       r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+       if (r)
+               return r;
+
        return r;
 }
 
@@ -1470,6 +1487,7 @@ static int sdma_v5_0_sw_fini(struct amdgpu_ip_block *ip_block)
        for (i = 0; i < adev->sdma.num_instances; i++)
                amdgpu_ring_fini(&adev->sdma.instance[i].ring);
 
+       amdgpu_sdma_sysfs_reset_mask_fini(adev);
        amdgpu_sdma_destroy_inst_ctx(adev, false);
 
        kfree(adev->sdma.ip_dump);
index ffa8c62..ba51603 100644 (file)
@@ -1357,6 +1357,24 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
                        return r;
        }
 
+       adev->sdma.supported_reset =
+               amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+       switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+       case IP_VERSION(5, 2, 0):
+       case IP_VERSION(5, 2, 2):
+       case IP_VERSION(5, 2, 3):
+       case IP_VERSION(5, 2, 4):
+               if (adev->sdma.instance[0].fw_version >= 76)
+                       adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+               break;
+       case IP_VERSION(5, 2, 5):
+               if (adev->sdma.instance[0].fw_version >= 34)
+                       adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+               break;
+       default:
+               break;
+       }
+
        /* Allocate memory for SDMA IP Dump buffer */
        ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
        if (ptr)
@@ -1364,6 +1382,10 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
        else
                DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
 
+       r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+       if (r)
+               return r;
+
        return r;
 }
 
@@ -1375,6 +1397,7 @@ static int sdma_v5_2_sw_fini(struct amdgpu_ip_block *ip_block)
        for (i = 0; i < adev->sdma.num_instances; i++)
                amdgpu_ring_fini(&adev->sdma.instance[i].ring);
 
+       amdgpu_sdma_sysfs_reset_mask_fini(adev);
        amdgpu_sdma_destroy_inst_ctx(adev, true);
 
        kfree(adev->sdma.ip_dump);
index 5635f2d..d46128b 100644 (file)
@@ -1350,6 +1350,19 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
                        return r;
        }
 
+       adev->sdma.supported_reset =
+               amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+       switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+       case IP_VERSION(6, 0, 0):
+       case IP_VERSION(6, 0, 2):
+       case IP_VERSION(6, 0, 3):
+               if (adev->sdma.instance[0].fw_version >= 21)
+                       adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+               break;
+       default:
+               break;
+       }
+
        if (amdgpu_sdma_ras_sw_init(adev)) {
                dev_err(adev->dev, "Failed to initialize sdma ras block!\n");
                return -EINVAL;
@@ -1362,6 +1375,10 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
        else
                DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
 
+       r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+       if (r)
+               return r;
+
        return r;
 }
 
@@ -1373,6 +1390,7 @@ static int sdma_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
        for (i = 0; i < adev->sdma.num_instances; i++)
                amdgpu_ring_fini(&adev->sdma.instance[i].ring);
 
+       amdgpu_sdma_sysfs_reset_mask_fini(adev);
        amdgpu_sdma_destroy_inst_ctx(adev, true);
 
        kfree(adev->sdma.ip_dump);