drm/amdgpu: gpu reset will run late_init
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
index cc8ad38..b2ba051 100644 (file)
@@ -97,6 +97,28 @@ static const char *amdgpu_asic_name[] = {
        "LAST",
 };
 
+/**
+ * DOC: pcie_replay_count
+ *
+ * The amdgpu driver provides a sysfs API for reporting the total number
+ * of PCIe replays (NAKs)
+ * The file pcie_replay_count is used for this and returns the total
+ * number of replays as a sum of the NAKs generated and NAKs received
+ */
+
+static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct drm_device *ddev = dev_get_drvdata(dev);
+       struct amdgpu_device *adev = ddev->dev_private;
+       uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
+}
+
+static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
+               amdgpu_device_get_pcie_replay_count, NULL);
+
 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
 
 /**
@@ -910,8 +932,10 @@ def_value:
  * Validates certain module parameters and updates
  * the associated values used by the driver (all asics).
  */
-static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
+static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
 {
+       int ret = 0;
+
        if (amdgpu_sched_jobs < 4) {
                dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
                         amdgpu_sched_jobs);
@@ -956,12 +980,15 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
                amdgpu_vram_page_split = 1024;
        }
 
-       if (amdgpu_lockup_timeout == 0) {
-               dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
-               amdgpu_lockup_timeout = 10000;
+       ret = amdgpu_device_get_job_timeout_settings(adev);
+       if (ret) {
+               dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
+               return ret;
        }
 
        adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
+
+       return ret;
 }
 
 /**
@@ -2473,7 +2500,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        mutex_init(&adev->lock_reset);
        mutex_init(&adev->virt.dpm_mutex);
 
-       amdgpu_device_check_arguments(adev);
+       r = amdgpu_device_check_arguments(adev);
+       if (r)
+               return r;
 
        spin_lock_init(&adev->mmio_idx_lock);
        spin_lock_init(&adev->smc_idx_lock);
@@ -2714,6 +2743,12 @@ fence_driver_init:
        /* must succeed. */
        amdgpu_ras_post_init(adev);
 
+       r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
+       if (r) {
+               dev_err(adev->dev, "Could not create pcie_replay_count");
+               return r;
+       }
+
        return 0;
 
 failed:
@@ -2777,6 +2812,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
        adev->rmmio = NULL;
        amdgpu_device_doorbell_fini(adev);
        amdgpu_debugfs_regs_cleanup(adev);
+       device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
 }
 
 
@@ -3458,6 +3494,10 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
                                if (vram_lost)
                                        amdgpu_device_fill_reset_magic(tmp_adev);
 
+                               r = amdgpu_device_ip_late_init(tmp_adev);
+                               if (r)
+                                       goto out;
+
                                /* Update PSP FW topology after reset */
                                if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
                                        r = amdgpu_xgmi_update_topology(hive, tmp_adev);