drm/amdgpu: Implement get num of hops between two xgmi device
authorshaoyunl <shaoyun.liu@amd.com>
Wed, 17 Apr 2019 18:28:18 +0000 (14:28 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 24 May 2019 17:20:48 +0000 (12:20 -0500)
KFD need to provide the info for upper level to determine the data path

Signed-off-by: shaoyunl <shaoyun.liu@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h

index 401edb6..8949b1a 100644 (file)
@@ -27,6 +27,7 @@
 #include "amdgpu_gfx.h"
 #include <linux/module.h>
 #include <linux/dma-buf.h>
+#include "amdgpu_xgmi.h"
 
 static const unsigned int compute_vmid_bitmap = 0xFF00;
 
@@ -518,6 +519,20 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
 
        return adev->gmc.xgmi.hive_id;
 }
+uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
+{
+       struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
+       struct amdgpu_device *adev = (struct amdgpu_device *)dst;
+       int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
+
+       if (ret < 0) {
+               DRM_ERROR("amdgpu: failed to get  xgmi hops count between node %d and %d. ret = %d\n",
+                       adev->gmc.xgmi.physical_node_id,
+                       peer_adev->gmc.xgmi.physical_node_id, ret);
+               ret = 0;
+       }
+       return  (uint8_t)ret;
+}
 
 uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
 {
index ea1f141..3369017 100644 (file)
@@ -170,6 +170,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
 uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
 uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
 uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
+uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
 
 #define read_user_wptr(mmptr, wptr, dst)                               \
        ({                                                              \
index cde113f..acbc18b 100644 (file)
@@ -95,12 +95,26 @@ struct psp_funcs
        int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr);
 };
 
+#define AMDGPU_XGMI_MAX_CONNECTED_NODES                64
+struct psp_xgmi_node_info {
+       uint64_t                                node_id;
+       uint8_t                                 num_hops;
+       uint8_t                                 is_sharing_enabled;
+       enum ta_xgmi_assigned_sdma_engine       sdma_engine;
+};
+
+struct psp_xgmi_topology_info {
+       uint32_t                        num_nodes;
+       struct psp_xgmi_node_info       nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
+};
+
 struct psp_xgmi_context {
        uint8_t                         initialized;
        uint32_t                        session_id;
        struct amdgpu_bo                *xgmi_shared_bo;
        uint64_t                        xgmi_shared_mc_addr;
        void                            *xgmi_shared_buf;
+       struct psp_xgmi_topology_info   top_info;
 };
 
 struct psp_ras_context {
@@ -181,18 +195,6 @@ struct amdgpu_psp_funcs {
                                        enum AMDGPU_UCODE_ID);
 };
 
-#define AMDGPU_XGMI_MAX_CONNECTED_NODES                64
-struct psp_xgmi_node_info {
-       uint64_t                                node_id;
-       uint8_t                                 num_hops;
-       uint8_t                                 is_sharing_enabled;
-       enum ta_xgmi_assigned_sdma_engine       sdma_engine;
-};
-
-struct psp_xgmi_topology_info {
-       uint32_t                        num_nodes;
-       struct psp_xgmi_node_info       nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
-};
 
 #define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
 #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
index a48c84c..04dfc8b 100644 (file)
@@ -238,7 +238,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
        /* Each psp need to set the latest topology */
        ret = psp_xgmi_set_topology_info(&adev->psp,
                                         hive->number_devices,
-                                        &hive->topology_info);
+                                        &adev->psp.xgmi_context.top_info);
        if (ret)
                dev_err(adev->dev,
                        "XGMI: Set topology failure on device %llx, hive %llx, ret %d",
@@ -248,9 +248,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
        return ret;
 }
 
+
+int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
+               struct amdgpu_device *peer_adev)
+{
+       struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+       int i;
+
+       for (i = 0 ; i < top->num_nodes; ++i)
+               if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
+                       return top->nodes[i].num_hops;
+       return  -EINVAL;
+}
+
 int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 {
-       struct psp_xgmi_topology_info *hive_topology;
+       struct psp_xgmi_topology_info *top_info;
        struct amdgpu_hive_info *hive;
        struct amdgpu_xgmi      *entry;
        struct amdgpu_device *tmp_adev = NULL;
@@ -283,16 +296,16 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
                goto exit;
        }
 
-       hive_topology = &hive->topology_info;
+       top_info = &adev->psp.xgmi_context.top_info;
 
        list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
        list_for_each_entry(entry, &hive->device_list, head)
-               hive_topology->nodes[count++].node_id = entry->node_id;
+               top_info->nodes[count++].node_id = entry->node_id;
        hive->number_devices = count;
 
        /* Each psp need to get the latest topology */
        list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
-               ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology);
+               ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, top_info);
                if (ret) {
                        dev_err(tmp_adev->dev,
                                "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
index 3e9c91e..fbcee31 100644 (file)
@@ -27,7 +27,6 @@
 struct amdgpu_hive_info {
        uint64_t                hive_id;
        struct list_head        device_list;
-       struct psp_xgmi_topology_info   topology_info;
        int number_devices;
        struct mutex hive_lock, reset_lock;
        struct kobject *kobj;
@@ -41,6 +40,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
 int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
 void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
 int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
+int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
+               struct amdgpu_device *peer_adev);
 
 static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
                struct amdgpu_device *bo_adev)