drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

   1 /*
   2  * Copyright 2008 Advanced Micro Devices, Inc.
   3  * Copyright 2008 Red Hat Inc.
   4  * Copyright 2009 Jerome Glisse.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in
  14  * all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22  * OTHER DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors: Dave Airlie
  25  *          Alex Deucher
  26  *          Jerome Glisse
  27  */
  28 #include <linux/power_supply.h>
  29 #include <linux/kthread.h>
  30 #include <linux/module.h>
  31 #include <linux/console.h>
  32 #include <linux/slab.h>
  33 #include <linux/iommu.h>
  34 #include <linux/pci.h>
  35 #include <linux/devcoredump.h>
  36 #include <generated/utsrelease.h>
  37 #include <linux/pci-p2pdma.h>
  38 #include <linux/apple-gmux.h>
  39
  40 #include <drm/drm_aperture.h>
  41 #include <drm/drm_atomic_helper.h>
  42 #include <drm/drm_crtc_helper.h>
  43 #include <drm/drm_fb_helper.h>
  44 #include <drm/drm_probe_helper.h>
  45 #include <drm/amdgpu_drm.h>
  46 #include <linux/vgaarb.h>
  47 #include <linux/vga_switcheroo.h>
  48 #include <linux/efi.h>
  49 #include "amdgpu.h"
  50 #include "amdgpu_trace.h"
  51 #include "amdgpu_i2c.h"
  52 #include "atom.h"
  53 #include "amdgpu_atombios.h"
  54 #include "amdgpu_atomfirmware.h"
  55 #include "amd_pcie.h"
  56 #ifdef CONFIG_DRM_AMDGPU_SI
  57 #include "si.h"
  58 #endif
  59 #ifdef CONFIG_DRM_AMDGPU_CIK
  60 #include "cik.h"
  61 #endif
  62 #include "vi.h"
  63 #include "soc15.h"
  64 #include "nv.h"
  65 #include "bif/bif_4_1_d.h"
  66 #include <linux/firmware.h>
  67 #include "amdgpu_vf_error.h"
  68
  69 #include "amdgpu_amdkfd.h"
  70 #include "amdgpu_pm.h"
  71
  72 #include "amdgpu_xgmi.h"
  73 #include "amdgpu_ras.h"
  74 #include "amdgpu_pmu.h"
  75 #include "amdgpu_fru_eeprom.h"
  76 #include "amdgpu_reset.h"
  77
  78 #include <linux/suspend.h>
  79 #include <drm/task_barrier.h>
  80 #include <linux/pm_runtime.h>
  81
  82 #include <drm/drm_drv.h>
  83
  84 #if IS_ENABLED(CONFIG_X86)
  85 #include <asm/intel-family.h>
  86 #endif
  87
  88 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  89 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  90 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
  91 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
  92 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
  93 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
  94 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
  95
  96 #define AMDGPU_RESUME_MS                2000
  97 #define AMDGPU_MAX_RETRY_LIMIT          2
  98 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
  99
 100 static const struct drm_driver amdgpu_kms_driver;
 101
 102 const char *amdgpu_asic_name[] = {
 103         "TAHITI",
 104         "PITCAIRN",
 105         "VERDE",
 106         "OLAND",
 107         "HAINAN",
 108         "BONAIRE",
 109         "KAVERI",
 110         "KABINI",
 111         "HAWAII",
 112         "MULLINS",
 113         "TOPAZ",
 114         "TONGA",
 115         "FIJI",
 116         "CARRIZO",
 117         "STONEY",
 118         "POLARIS10",
 119         "POLARIS11",
 120         "POLARIS12",
 121         "VEGAM",
 122         "VEGA10",
 123         "VEGA12",
 124         "VEGA20",
 125         "RAVEN",
 126         "ARCTURUS",
 127         "RENOIR",
 128         "ALDEBARAN",
 129         "NAVI10",
 130         "CYAN_SKILLFISH",
 131         "NAVI14",
 132         "NAVI12",
 133         "SIENNA_CICHLID",
 134         "NAVY_FLOUNDER",
 135         "VANGOGH",
 136         "DIMGREY_CAVEFISH",
 137         "BEIGE_GOBY",
 138         "YELLOW_CARP",
 139         "IP DISCOVERY",
 140         "LAST",
 141 };
 142
 143 /**
 144  * DOC: pcie_replay_count
 145  *
 146  * The amdgpu driver provides a sysfs API for reporting the total number
 147  * of PCIe replays (NAKs)
 148  * The file pcie_replay_count is used for this and returns the total
 149  * number of replays as a sum of the NAKs generated and NAKs received
 150  */
 151
 152 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
 153                 struct device_attribute *attr, char *buf)
 154 {
 155         struct drm_device *ddev = dev_get_drvdata(dev);
 156         struct amdgpu_device *adev = drm_to_adev(ddev);
 157         uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
 158
 159         return sysfs_emit(buf, "%llu\n", cnt);
 160 }
 161
 162 static DEVICE_ATTR(pcie_replay_count, 0444,
 163                 amdgpu_device_get_pcie_replay_count, NULL);
 164
 165 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
 166
 167
 168 /**
 169  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
 170  *
 171  * @dev: drm_device pointer
 172  *
 173  * Returns true if the device is a dGPU with ATPX power control,
 174  * otherwise return false.
 175  */
 176 bool amdgpu_device_supports_px(struct drm_device *dev)
 177 {
 178         struct amdgpu_device *adev = drm_to_adev(dev);
 179
 180         if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
 181                 return true;
 182         return false;
 183 }
 184
 185 /**
 186  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
 187  *
 188  * @dev: drm_device pointer
 189  *
 190  * Returns true if the device is a dGPU with ACPI power control,
 191  * otherwise return false.
 192  */
 193 bool amdgpu_device_supports_boco(struct drm_device *dev)
 194 {
 195         struct amdgpu_device *adev = drm_to_adev(dev);
 196
 197         if (adev->has_pr3 ||
 198             ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
 199                 return true;
 200         return false;
 201 }
 202
 203 /**
 204  * amdgpu_device_supports_baco - Does the device support BACO
 205  *
 206  * @dev: drm_device pointer
 207  *
 208  * Returns true if the device supporte BACO,
 209  * otherwise return false.
 210  */
 211 bool amdgpu_device_supports_baco(struct drm_device *dev)
 212 {
 213         struct amdgpu_device *adev = drm_to_adev(dev);
 214
 215         return amdgpu_asic_supports_baco(adev);
 216 }
 217
 218 /**
 219  * amdgpu_device_supports_smart_shift - Is the device dGPU with
 220  * smart shift support
 221  *
 222  * @dev: drm_device pointer
 223  *
 224  * Returns true if the device is a dGPU with Smart Shift support,
 225  * otherwise returns false.
 226  */
 227 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
 228 {
 229         return (amdgpu_device_supports_boco(dev) &&
 230                 amdgpu_acpi_is_power_shift_control_supported());
 231 }
 232
 233 /*
 234  * VRAM access helper functions
 235  */
 236
 237 /**
 238  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
 239  *
 240  * @adev: amdgpu_device pointer
 241  * @pos: offset of the buffer in vram
 242  * @buf: virtual address of the buffer in system memory
 243  * @size: read/write size, sizeof(@buf) must > @size
 244  * @write: true - write to vram, otherwise - read from vram
 245  */
 246 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
 247                              void *buf, size_t size, bool write)
 248 {
 249         unsigned long flags;
 250         uint32_t hi = ~0, tmp = 0;
 251         uint32_t *data = buf;
 252         uint64_t last;
 253         int idx;
 254
 255         if (!drm_dev_enter(adev_to_drm(adev), &idx))
 256                 return;
 257
 258         BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
 259
 260         spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 261         for (last = pos + size; pos < last; pos += 4) {
 262                 tmp = pos >> 31;
 263
 264                 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
 265                 if (tmp != hi) {
 266                         WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
 267                         hi = tmp;
 268                 }
 269                 if (write)
 270                         WREG32_NO_KIQ(mmMM_DATA, *data++);
 271                 else
 272                         *data++ = RREG32_NO_KIQ(mmMM_DATA);
 273         }
 274
 275         spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
 276         drm_dev_exit(idx);
 277 }
 278
 279 /**
 280  * amdgpu_device_aper_access - access vram by vram aperature
 281  *
 282  * @adev: amdgpu_device pointer
 283  * @pos: offset of the buffer in vram
 284  * @buf: virtual address of the buffer in system memory
 285  * @size: read/write size, sizeof(@buf) must > @size
 286  * @write: true - write to vram, otherwise - read from vram
 287  *
 288  * The return value means how many bytes have been transferred.
 289  */
 290 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
 291                                  void *buf, size_t size, bool write)
 292 {
 293 #ifdef CONFIG_64BIT
 294         void __iomem *addr;
 295         size_t count = 0;
 296         uint64_t last;
 297
 298         if (!adev->mman.aper_base_kaddr)
 299                 return 0;
 300
 301         last = min(pos + size, adev->gmc.visible_vram_size);
 302         if (last > pos) {
 303                 addr = adev->mman.aper_base_kaddr + pos;
 304                 count = last - pos;
 305
 306                 if (write) {
 307                         memcpy_toio(addr, buf, count);
 308                         /* Make sure HDP write cache flush happens without any reordering
 309                          * after the system memory contents are sent over PCIe device
 310                          */
 311                         mb();
 312                         amdgpu_device_flush_hdp(adev, NULL);
 313                 } else {
 314                         amdgpu_device_invalidate_hdp(adev, NULL);
 315                         /* Make sure HDP read cache is invalidated before issuing a read
 316                          * to the PCIe device
 317                          */
 318                         mb();
 319                         memcpy_fromio(buf, addr, count);
 320                 }
 321
 322         }
 323
 324         return count;
 325 #else
 326         return 0;
 327 #endif
 328 }
 329
 330 /**
 331  * amdgpu_device_vram_access - read/write a buffer in vram
 332  *
 333  * @adev: amdgpu_device pointer
 334  * @pos: offset of the buffer in vram
 335  * @buf: virtual address of the buffer in system memory
 336  * @size: read/write size, sizeof(@buf) must > @size
 337  * @write: true - write to vram, otherwise - read from vram
 338  */
 339 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
 340                                void *buf, size_t size, bool write)
 341 {
 342         size_t count;
 343
 344         /* try to using vram apreature to access vram first */
 345         count = amdgpu_device_aper_access(adev, pos, buf, size, write);
 346         size -= count;
 347         if (size) {
 348                 /* using MM to access rest vram */
 349                 pos += count;
 350                 buf += count;
 351                 amdgpu_device_mm_access(adev, pos, buf, size, write);
 352         }
 353 }
 354
 355 /*
 356  * register access helper functions.
 357  */
 358
 359 /* Check if hw access should be skipped because of hotplug or device error */
 360 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
 361 {
 362         if (adev->no_hw_access)
 363                 return true;
 364
 365 #ifdef CONFIG_LOCKDEP
 366         /*
 367          * This is a bit complicated to understand, so worth a comment. What we assert
 368          * here is that the GPU reset is not running on another thread in parallel.
 369          *
 370          * For this we trylock the read side of the reset semaphore, if that succeeds
 371          * we know that the reset is not running in paralell.
 372          *
 373          * If the trylock fails we assert that we are either already holding the read
 374          * side of the lock or are the reset thread itself and hold the write side of
 375          * the lock.
 376          */
 377         if (in_task()) {
 378                 if (down_read_trylock(&adev->reset_domain->sem))
 379                         up_read(&adev->reset_domain->sem);
 380                 else
 381                         lockdep_assert_held(&adev->reset_domain->sem);
 382         }
 383 #endif
 384         return false;
 385 }
 386
 387 /**
 388  * amdgpu_device_rreg - read a memory mapped IO or indirect register
 389  *
 390  * @adev: amdgpu_device pointer
 391  * @reg: dword aligned register offset
 392  * @acc_flags: access flags which require special behavior
 393  *
 394  * Returns the 32 bit value from the offset specified.
 395  */
 396 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
 397                             uint32_t reg, uint32_t acc_flags)
 398 {
 399         uint32_t ret;
 400
 401         if (amdgpu_device_skip_hw_access(adev))
 402                 return 0;
 403
 404         if ((reg * 4) < adev->rmmio_size) {
 405                 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 406                     amdgpu_sriov_runtime(adev) &&
 407                     down_read_trylock(&adev->reset_domain->sem)) {
 408                         ret = amdgpu_kiq_rreg(adev, reg);
 409                         up_read(&adev->reset_domain->sem);
 410                 } else {
 411                         ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 412                 }
 413         } else {
 414                 ret = adev->pcie_rreg(adev, reg * 4);
 415         }
 416
 417         trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
 418
 419         return ret;
 420 }
 421
 422 /*
 423  * MMIO register read with bytes helper functions
 424  * @offset:bytes offset from MMIO start
 425  */
 426
 427 /**
 428  * amdgpu_mm_rreg8 - read a memory mapped IO register
 429  *
 430  * @adev: amdgpu_device pointer
 431  * @offset: byte aligned register offset
 432  *
 433  * Returns the 8 bit value from the offset specified.
 434  */
 435 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
 436 {
 437         if (amdgpu_device_skip_hw_access(adev))
 438                 return 0;
 439
 440         if (offset < adev->rmmio_size)
 441                 return (readb(adev->rmmio + offset));
 442         BUG();
 443 }
 444
 445 /*
 446  * MMIO register write with bytes helper functions
 447  * @offset:bytes offset from MMIO start
 448  * @value: the value want to be written to the register
 449  */
 450
 451 /**
 452  * amdgpu_mm_wreg8 - read a memory mapped IO register
 453  *
 454  * @adev: amdgpu_device pointer
 455  * @offset: byte aligned register offset
 456  * @value: 8 bit value to write
 457  *
 458  * Writes the value specified to the offset specified.
 459  */
 460 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
 461 {
 462         if (amdgpu_device_skip_hw_access(adev))
 463                 return;
 464
 465         if (offset < adev->rmmio_size)
 466                 writeb(value, adev->rmmio + offset);
 467         else
 468                 BUG();
 469 }
 470
 471 /**
 472  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
 473  *
 474  * @adev: amdgpu_device pointer
 475  * @reg: dword aligned register offset
 476  * @v: 32 bit value to write to the register
 477  * @acc_flags: access flags which require special behavior
 478  *
 479  * Writes the value specified to the offset specified.
 480  */
 481 void amdgpu_device_wreg(struct amdgpu_device *adev,
 482                         uint32_t reg, uint32_t v,
 483                         uint32_t acc_flags)
 484 {
 485         if (amdgpu_device_skip_hw_access(adev))
 486                 return;
 487
 488         if ((reg * 4) < adev->rmmio_size) {
 489                 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 490                     amdgpu_sriov_runtime(adev) &&
 491                     down_read_trylock(&adev->reset_domain->sem)) {
 492                         amdgpu_kiq_wreg(adev, reg, v);
 493                         up_read(&adev->reset_domain->sem);
 494                 } else {
 495                         writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 496                 }
 497         } else {
 498                 adev->pcie_wreg(adev, reg * 4, v);
 499         }
 500
 501         trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
 502 }
 503
 504 /**
 505  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
 506  *
 507  * @adev: amdgpu_device pointer
 508  * @reg: mmio/rlc register
 509  * @v: value to write
 510  *
 511  * this function is invoked only for the debugfs register access
 512  */
 513 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 514                              uint32_t reg, uint32_t v,
 515                              uint32_t xcc_id)
 516 {
 517         if (amdgpu_device_skip_hw_access(adev))
 518                 return;
 519
 520         if (amdgpu_sriov_fullaccess(adev) &&
 521             adev->gfx.rlc.funcs &&
 522             adev->gfx.rlc.funcs->is_rlcg_access_range) {
 523                 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
 524                         return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
 525         } else if ((reg * 4) >= adev->rmmio_size) {
 526                 adev->pcie_wreg(adev, reg * 4, v);
 527         } else {
 528                 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 529         }
 530 }
 531
 532 /**
 533  * amdgpu_device_indirect_rreg - read an indirect register
 534  *
 535  * @adev: amdgpu_device pointer
 536  * @reg_addr: indirect register address to read from
 537  *
 538  * Returns the value of indirect register @reg_addr
 539  */
 540 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
 541                                 u32 reg_addr)
 542 {
 543         unsigned long flags, pcie_index, pcie_data;
 544         void __iomem *pcie_index_offset;
 545         void __iomem *pcie_data_offset;
 546         u32 r;
 547
 548         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 549         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 550
 551         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 552         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 553         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 554
 555         writel(reg_addr, pcie_index_offset);
 556         readl(pcie_index_offset);
 557         r = readl(pcie_data_offset);
 558         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 559
 560         return r;
 561 }
 562
 563 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
 564                                     u64 reg_addr)
 565 {
 566         unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
 567         u32 r;
 568         void __iomem *pcie_index_offset;
 569         void __iomem *pcie_index_hi_offset;
 570         void __iomem *pcie_data_offset;
 571
 572         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 573         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 574         if (adev->nbio.funcs->get_pcie_index_hi_offset)
 575                 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
 576         else
 577                 pcie_index_hi = 0;
 578
 579         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 580         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 581         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 582         if (pcie_index_hi != 0)
 583                 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
 584                                 pcie_index_hi * 4;
 585
 586         writel(reg_addr, pcie_index_offset);
 587         readl(pcie_index_offset);
 588         if (pcie_index_hi != 0) {
 589                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
 590                 readl(pcie_index_hi_offset);
 591         }
 592         r = readl(pcie_data_offset);
 593
 594         /* clear the high bits */
 595         if (pcie_index_hi != 0) {
 596                 writel(0, pcie_index_hi_offset);
 597                 readl(pcie_index_hi_offset);
 598         }
 599
 600         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 601
 602         return r;
 603 }
 604
 605 /**
 606  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
 607  *
 608  * @adev: amdgpu_device pointer
 609  * @reg_addr: indirect register address to read from
 610  *
 611  * Returns the value of indirect register @reg_addr
 612  */
 613 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
 614                                   u32 reg_addr)
 615 {
 616         unsigned long flags, pcie_index, pcie_data;
 617         void __iomem *pcie_index_offset;
 618         void __iomem *pcie_data_offset;
 619         u64 r;
 620
 621         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 622         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 623
 624         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 625         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 626         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 627
 628         /* read low 32 bits */
 629         writel(reg_addr, pcie_index_offset);
 630         readl(pcie_index_offset);
 631         r = readl(pcie_data_offset);
 632         /* read high 32 bits */
 633         writel(reg_addr + 4, pcie_index_offset);
 634         readl(pcie_index_offset);
 635         r |= ((u64)readl(pcie_data_offset) << 32);
 636         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 637
 638         return r;
 639 }
 640
 641 /**
 642  * amdgpu_device_indirect_wreg - write an indirect register address
 643  *
 644  * @adev: amdgpu_device pointer
 645  * @reg_addr: indirect register offset
 646  * @reg_data: indirect register data
 647  *
 648  */
 649 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
 650                                  u32 reg_addr, u32 reg_data)
 651 {
 652         unsigned long flags, pcie_index, pcie_data;
 653         void __iomem *pcie_index_offset;
 654         void __iomem *pcie_data_offset;
 655
 656         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 657         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 658
 659         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 660         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 661         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 662
 663         writel(reg_addr, pcie_index_offset);
 664         readl(pcie_index_offset);
 665         writel(reg_data, pcie_data_offset);
 666         readl(pcie_data_offset);
 667         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 668 }
 669
 670 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
 671                                      u64 reg_addr, u32 reg_data)
 672 {
 673         unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
 674         void __iomem *pcie_index_offset;
 675         void __iomem *pcie_index_hi_offset;
 676         void __iomem *pcie_data_offset;
 677
 678         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 679         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 680         if (adev->nbio.funcs->get_pcie_index_hi_offset)
 681                 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
 682         else
 683                 pcie_index_hi = 0;
 684
 685         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 686         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 687         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 688         if (pcie_index_hi != 0)
 689                 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
 690                                 pcie_index_hi * 4;
 691
 692         writel(reg_addr, pcie_index_offset);
 693         readl(pcie_index_offset);
 694         if (pcie_index_hi != 0) {
 695                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
 696                 readl(pcie_index_hi_offset);
 697         }
 698         writel(reg_data, pcie_data_offset);
 699         readl(pcie_data_offset);
 700
 701         /* clear the high bits */
 702         if (pcie_index_hi != 0) {
 703                 writel(0, pcie_index_hi_offset);
 704                 readl(pcie_index_hi_offset);
 705         }
 706
 707         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 708 }
 709
 710 /**
 711  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
 712  *
 713  * @adev: amdgpu_device pointer
 714  * @reg_addr: indirect register offset
 715  * @reg_data: indirect register data
 716  *
 717  */
 718 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
 719                                    u32 reg_addr, u64 reg_data)
 720 {
 721         unsigned long flags, pcie_index, pcie_data;
 722         void __iomem *pcie_index_offset;
 723         void __iomem *pcie_data_offset;
 724
 725         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 726         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 727
 728         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 729         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 730         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 731
 732         /* write low 32 bits */
 733         writel(reg_addr, pcie_index_offset);
 734         readl(pcie_index_offset);
 735         writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
 736         readl(pcie_data_offset);
 737         /* write high 32 bits */
 738         writel(reg_addr + 4, pcie_index_offset);
 739         readl(pcie_index_offset);
 740         writel((u32)(reg_data >> 32), pcie_data_offset);
 741         readl(pcie_data_offset);
 742         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 743 }
 744
 745 /**
 746  * amdgpu_device_get_rev_id - query device rev_id
 747  *
 748  * @adev: amdgpu_device pointer
 749  *
 750  * Return device rev_id
 751  */
 752 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
 753 {
 754         return adev->nbio.funcs->get_rev_id(adev);
 755 }
 756
 757 /**
 758  * amdgpu_invalid_rreg - dummy reg read function
 759  *
 760  * @adev: amdgpu_device pointer
 761  * @reg: offset of register
 762  *
 763  * Dummy register read function.  Used for register blocks
 764  * that certain asics don't have (all asics).
 765  * Returns the value in the register.
 766  */
 767 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
 768 {
 769         DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
 770         BUG();
 771         return 0;
 772 }
 773
 774 static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
 775 {
 776         DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
 777         BUG();
 778         return 0;
 779 }
 780
 781 /**
 782  * amdgpu_invalid_wreg - dummy reg write function
 783  *
 784  * @adev: amdgpu_device pointer
 785  * @reg: offset of register
 786  * @v: value to write to the register
 787  *
 788  * Dummy register read function.  Used for register blocks
 789  * that certain asics don't have (all asics).
 790  */
 791 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 792 {
 793         DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
 794                   reg, v);
 795         BUG();
 796 }
 797
 798 static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
 799 {
 800         DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
 801                   reg, v);
 802         BUG();
 803 }
 804
 805 /**
 806  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
 807  *
 808  * @adev: amdgpu_device pointer
 809  * @reg: offset of register
 810  *
 811  * Dummy register read function.  Used for register blocks
 812  * that certain asics don't have (all asics).
 813  * Returns the value in the register.
 814  */
 815 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
 816 {
 817         DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
 818         BUG();
 819         return 0;
 820 }
 821
 822 /**
 823  * amdgpu_invalid_wreg64 - dummy reg write function
 824  *
 825  * @adev: amdgpu_device pointer
 826  * @reg: offset of register
 827  * @v: value to write to the register
 828  *
 829  * Dummy register read function.  Used for register blocks
 830  * that certain asics don't have (all asics).
 831  */
 832 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
 833 {
 834         DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
 835                   reg, v);
 836         BUG();
 837 }
 838
 839 /**
 840  * amdgpu_block_invalid_rreg - dummy reg read function
 841  *
 842  * @adev: amdgpu_device pointer
 843  * @block: offset of instance
 844  * @reg: offset of register
 845  *
 846  * Dummy register read function.  Used for register blocks
 847  * that certain asics don't have (all asics).
 848  * Returns the value in the register.
 849  */
 850 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
 851                                           uint32_t block, uint32_t reg)
 852 {
 853         DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
 854                   reg, block);
 855         BUG();
 856         return 0;
 857 }
 858
 859 /**
 860  * amdgpu_block_invalid_wreg - dummy reg write function
 861  *
 862  * @adev: amdgpu_device pointer
 863  * @block: offset of instance
 864  * @reg: offset of register
 865  * @v: value to write to the register
 866  *
 867  * Dummy register read function.  Used for register blocks
 868  * that certain asics don't have (all asics).
 869  */
 870 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
 871                                       uint32_t block,
 872                                       uint32_t reg, uint32_t v)
 873 {
 874         DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
 875                   reg, block, v);
 876         BUG();
 877 }
 878
 879 /**
 880  * amdgpu_device_asic_init - Wrapper for atom asic_init
 881  *
 882  * @adev: amdgpu_device pointer
 883  *
 884  * Does any asic specific work and then calls atom asic init.
 885  */
 886 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
 887 {
 888         amdgpu_asic_pre_asic_init(adev);
 889
 890         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
 891             adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
 892                 return amdgpu_atomfirmware_asic_init(adev, true);
 893         else
 894                 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
 895 }
 896
 897 /**
 898  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
 899  *
 900  * @adev: amdgpu_device pointer
 901  *
 902  * Allocates a scratch page of VRAM for use by various things in the
 903  * driver.
 904  */
 905 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
 906 {
 907         return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
 908                                        AMDGPU_GEM_DOMAIN_VRAM |
 909                                        AMDGPU_GEM_DOMAIN_GTT,
 910                                        &adev->mem_scratch.robj,
 911                                        &adev->mem_scratch.gpu_addr,
 912                                        (void **)&adev->mem_scratch.ptr);
 913 }
 914
 915 /**
 916  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
 917  *
 918  * @adev: amdgpu_device pointer
 919  *
 920  * Frees the VRAM scratch page.
 921  */
 922 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
 923 {
 924         amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
 925 }
 926
 927 /**
 928  * amdgpu_device_program_register_sequence - program an array of registers.
 929  *
 930  * @adev: amdgpu_device pointer
 931  * @registers: pointer to the register array
 932  * @array_size: size of the register array
 933  *
 934  * Programs an array or registers with and or masks.
 935  * This is a helper for setting golden registers.
 936  */
 937 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 938                                              const u32 *registers,
 939                                              const u32 array_size)
 940 {
 941         u32 tmp, reg, and_mask, or_mask;
 942         int i;
 943
 944         if (array_size % 3)
 945                 return;
 946
 947         for (i = 0; i < array_size; i += 3) {
 948                 reg = registers[i + 0];
 949                 and_mask = registers[i + 1];
 950                 or_mask = registers[i + 2];
 951
 952                 if (and_mask == 0xffffffff) {
 953                         tmp = or_mask;
 954                 } else {
 955                         tmp = RREG32(reg);
 956                         tmp &= ~and_mask;
 957                         if (adev->family >= AMDGPU_FAMILY_AI)
 958                                 tmp |= (or_mask & and_mask);
 959                         else
 960                                 tmp |= or_mask;
 961                 }
 962                 WREG32(reg, tmp);
 963         }
 964 }
 965
 966 /**
 967  * amdgpu_device_pci_config_reset - reset the GPU
 968  *
 969  * @adev: amdgpu_device pointer
 970  *
 971  * Resets the GPU using the pci config reset sequence.
 972  * Only applicable to asics prior to vega10.
 973  */
 974 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
 975 {
 976         pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
 977 }
 978
 979 /**
 980  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
 981  *
 982  * @adev: amdgpu_device pointer
 983  *
 984  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
 985  */
 986 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
 987 {
 988         return pci_reset_function(adev->pdev);
 989 }
 990
 991 /*
 992  * amdgpu_device_wb_*()
 993  * Writeback is the method by which the GPU updates special pages in memory
 994  * with the status of certain GPU events (fences, ring pointers,etc.).
 995  */
 996
 997 /**
 998  * amdgpu_device_wb_fini - Disable Writeback and free memory
 999  *
1000  * @adev: amdgpu_device pointer
1001  *
1002  * Disables Writeback and frees the Writeback memory (all asics).
1003  * Used at driver shutdown.
1004  */
1005 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1006 {
1007         if (adev->wb.wb_obj) {
1008                 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1009                                       &adev->wb.gpu_addr,
1010                                       (void **)&adev->wb.wb);
1011                 adev->wb.wb_obj = NULL;
1012         }
1013 }
1014
1015 /**
1016  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1017  *
1018  * @adev: amdgpu_device pointer
1019  *
1020  * Initializes writeback and allocates writeback memory (all asics).
1021  * Used at driver startup.
1022  * Returns 0 on success or an -error on failure.
1023  */
1024 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1025 {
1026         int r;
1027
1028         if (adev->wb.wb_obj == NULL) {
1029                 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1030                 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1031                                             PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1032                                             &adev->wb.wb_obj, &adev->wb.gpu_addr,
1033                                             (void **)&adev->wb.wb);
1034                 if (r) {
1035                         dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1036                         return r;
1037                 }
1038
1039                 adev->wb.num_wb = AMDGPU_MAX_WB;
1040                 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1041
1042                 /* clear wb memory */
1043                 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1044         }
1045
1046         return 0;
1047 }
1048
1049 /**
1050  * amdgpu_device_wb_get - Allocate a wb entry
1051  *
1052  * @adev: amdgpu_device pointer
1053  * @wb: wb index
1054  *
1055  * Allocate a wb slot for use by the driver (all asics).
1056  * Returns 0 on success or -EINVAL on failure.
1057  */
1058 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1059 {
1060         unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1061
1062         if (offset < adev->wb.num_wb) {
1063                 __set_bit(offset, adev->wb.used);
1064                 *wb = offset << 3; /* convert to dw offset */
1065                 return 0;
1066         } else {
1067                 return -EINVAL;
1068         }
1069 }
1070
1071 /**
1072  * amdgpu_device_wb_free - Free a wb entry
1073  *
1074  * @adev: amdgpu_device pointer
1075  * @wb: wb index
1076  *
1077  * Free a wb slot allocated for use by the driver (all asics)
1078  */
1079 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1080 {
1081         wb >>= 3;
1082         if (wb < adev->wb.num_wb)
1083                 __clear_bit(wb, adev->wb.used);
1084 }
1085
1086 /**
1087  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1088  *
1089  * @adev: amdgpu_device pointer
1090  *
1091  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1092  * to fail, but if any of the BARs is not accessible after the size we abort
1093  * driver loading by returning -ENODEV.
1094  */
1095 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1096 {
1097         int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1098         struct pci_bus *root;
1099         struct resource *res;
1100         unsigned int i;
1101         u16 cmd;
1102         int r;
1103
1104         if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1105                 return 0;
1106
1107         /* Bypass for VF */
1108         if (amdgpu_sriov_vf(adev))
1109                 return 0;
1110
1111         /* skip if the bios has already enabled large BAR */
1112         if (adev->gmc.real_vram_size &&
1113             (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1114                 return 0;
1115
1116         /* Check if the root BUS has 64bit memory resources */
1117         root = adev->pdev->bus;
1118         while (root->parent)
1119                 root = root->parent;
1120
1121         pci_bus_for_each_resource(root, res, i) {
1122                 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1123                     res->start > 0x100000000ull)
1124                         break;
1125         }
1126
1127         /* Trying to resize is pointless without a root hub window above 4GB */
1128         if (!res)
1129                 return 0;
1130
1131         /* Limit the BAR size to what is available */
1132         rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1133                         rbar_size);
1134
1135         /* Disable memory decoding while we change the BAR addresses and size */
1136         pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1137         pci_write_config_word(adev->pdev, PCI_COMMAND,
1138                               cmd & ~PCI_COMMAND_MEMORY);
1139
1140         /* Free the VRAM and doorbell BAR, we most likely need to move both. */
1141         amdgpu_doorbell_fini(adev);
1142         if (adev->asic_type >= CHIP_BONAIRE)
1143                 pci_release_resource(adev->pdev, 2);
1144
1145         pci_release_resource(adev->pdev, 0);
1146
1147         r = pci_resize_resource(adev->pdev, 0, rbar_size);
1148         if (r == -ENOSPC)
1149                 DRM_INFO("Not enough PCI address space for a large BAR.");
1150         else if (r && r != -ENOTSUPP)
1151                 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1152
1153         pci_assign_unassigned_bus_resources(adev->pdev->bus);
1154
1155         /* When the doorbell or fb BAR isn't available we have no chance of
1156          * using the device.
1157          */
1158         r = amdgpu_doorbell_init(adev);
1159         if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1160                 return -ENODEV;
1161
1162         pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1163
1164         return 0;
1165 }
1166
1167 static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1168 {
1169         if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1170                 return false;
1171
1172         return true;
1173 }
1174
1175 /*
1176  * GPU helpers function.
1177  */
1178 /**
1179  * amdgpu_device_need_post - check if the hw need post or not
1180  *
1181  * @adev: amdgpu_device pointer
1182  *
1183  * Check if the asic has been initialized (all asics) at driver startup
1184  * or post is needed if  hw reset is performed.
1185  * Returns true if need or false if not.
1186  */
1187 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1188 {
1189         uint32_t reg;
1190
1191         if (amdgpu_sriov_vf(adev))
1192                 return false;
1193
1194         if (!amdgpu_device_read_bios(adev))
1195                 return false;
1196
1197         if (amdgpu_passthrough(adev)) {
1198                 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1199                  * some old smc fw still need driver do vPost otherwise gpu hang, while
1200                  * those smc fw version above 22.15 doesn't have this flaw, so we force
1201                  * vpost executed for smc version below 22.15
1202                  */
1203                 if (adev->asic_type == CHIP_FIJI) {
1204                         int err;
1205                         uint32_t fw_ver;
1206
1207                         err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1208                         /* force vPost if error occured */
1209                         if (err)
1210                                 return true;
1211
1212                         fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1213                         if (fw_ver < 0x00160e00)
1214                                 return true;
1215                 }
1216         }
1217
1218         /* Don't post if we need to reset whole hive on init */
1219         if (adev->gmc.xgmi.pending_reset)
1220                 return false;
1221
1222         if (adev->has_hw_reset) {
1223                 adev->has_hw_reset = false;
1224                 return true;
1225         }
1226
1227         /* bios scratch used on CIK+ */
1228         if (adev->asic_type >= CHIP_BONAIRE)
1229                 return amdgpu_atombios_scratch_need_asic_init(adev);
1230
1231         /* check MEM_SIZE for older asics */
1232         reg = amdgpu_asic_get_config_memsize(adev);
1233
1234         if ((reg != 0) && (reg != 0xffffffff))
1235                 return false;
1236
1237         return true;
1238 }
1239
1240 /*
1241  * On APUs with >= 64GB white flickering has been observed w/ SG enabled.
1242  * Disable S/G on such systems until we have a proper fix.
1243  * https://gitlab.freedesktop.org/drm/amd/-/issues/2354
1244  * https://gitlab.freedesktop.org/drm/amd/-/issues/2735
1245  */
1246 bool amdgpu_sg_display_supported(struct amdgpu_device *adev)
1247 {
1248         switch (amdgpu_sg_display) {
1249         case -1:
1250                 break;
1251         case 0:
1252                 return false;
1253         case 1:
1254                 return true;
1255         default:
1256                 return false;
1257         }
1258         if ((totalram_pages() << (PAGE_SHIFT - 10)) +
1259             (adev->gmc.real_vram_size / 1024) >= 64000000) {
1260                 DRM_WARN("Disabling S/G due to >=64GB RAM\n");
1261                 return false;
1262         }
1263         return true;
1264 }
1265
1266 /*
1267  * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
1268  * speed switching. Until we have confirmation from Intel that a specific host
1269  * supports it, it's safer that we keep it disabled for all.
1270  *
1271  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1272  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1273  */
1274 bool amdgpu_device_pcie_dynamic_switching_supported(void)
1275 {
1276 #if IS_ENABLED(CONFIG_X86)
1277         struct cpuinfo_x86 *c = &cpu_data(0);
1278
1279         if (c->x86_vendor == X86_VENDOR_INTEL)
1280                 return false;
1281 #endif
1282         return true;
1283 }
1284
1285 /**
1286  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1287  *
1288  * @adev: amdgpu_device pointer
1289  *
1290  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1291  * be set for this device.
1292  *
1293  * Returns true if it should be used or false if not.
1294  */
1295 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1296 {
1297         switch (amdgpu_aspm) {
1298         case -1:
1299                 break;
1300         case 0:
1301                 return false;
1302         case 1:
1303                 return true;
1304         default:
1305                 return false;
1306         }
1307         return pcie_aspm_enabled(adev->pdev);
1308 }
1309
1310 bool amdgpu_device_aspm_support_quirk(void)
1311 {
1312 #if IS_ENABLED(CONFIG_X86)
1313         struct cpuinfo_x86 *c = &cpu_data(0);
1314
1315         return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1316 #else
1317         return true;
1318 #endif
1319 }
1320
1321 /* if we get transitioned to only one device, take VGA back */
1322 /**
1323  * amdgpu_device_vga_set_decode - enable/disable vga decode
1324  *
1325  * @pdev: PCI device pointer
1326  * @state: enable/disable vga decode
1327  *
1328  * Enable/disable vga decode (all asics).
1329  * Returns VGA resource flags.
1330  */
1331 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1332                 bool state)
1333 {
1334         struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1335
1336         amdgpu_asic_set_vga_state(adev, state);
1337         if (state)
1338                 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1339                        VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1340         else
1341                 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1342 }
1343
1344 /**
1345  * amdgpu_device_check_block_size - validate the vm block size
1346  *
1347  * @adev: amdgpu_device pointer
1348  *
1349  * Validates the vm block size specified via module parameter.
1350  * The vm block size defines number of bits in page table versus page directory,
1351  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1352  * page table and the remaining bits are in the page directory.
1353  */
1354 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1355 {
1356         /* defines number of bits in page table versus page directory,
1357          * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1358          * page table and the remaining bits are in the page directory
1359          */
1360         if (amdgpu_vm_block_size == -1)
1361                 return;
1362
1363         if (amdgpu_vm_block_size < 9) {
1364                 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1365                          amdgpu_vm_block_size);
1366                 amdgpu_vm_block_size = -1;
1367         }
1368 }
1369
1370 /**
1371  * amdgpu_device_check_vm_size - validate the vm size
1372  *
1373  * @adev: amdgpu_device pointer
1374  *
1375  * Validates the vm size in GB specified via module parameter.
1376  * The VM size is the size of the GPU virtual memory space in GB.
1377  */
1378 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1379 {
1380         /* no need to check the default value */
1381         if (amdgpu_vm_size == -1)
1382                 return;
1383
1384         if (amdgpu_vm_size < 1) {
1385                 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1386                          amdgpu_vm_size);
1387                 amdgpu_vm_size = -1;
1388         }
1389 }
1390
1391 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1392 {
1393         struct sysinfo si;
1394         bool is_os_64 = (sizeof(void *) == 8);
1395         uint64_t total_memory;
1396         uint64_t dram_size_seven_GB = 0x1B8000000;
1397         uint64_t dram_size_three_GB = 0xB8000000;
1398
1399         if (amdgpu_smu_memory_pool_size == 0)
1400                 return;
1401
1402         if (!is_os_64) {
1403                 DRM_WARN("Not 64-bit OS, feature not supported\n");
1404                 goto def_value;
1405         }
1406         si_meminfo(&si);
1407         total_memory = (uint64_t)si.totalram * si.mem_unit;
1408
1409         if ((amdgpu_smu_memory_pool_size == 1) ||
1410                 (amdgpu_smu_memory_pool_size == 2)) {
1411                 if (total_memory < dram_size_three_GB)
1412                         goto def_value1;
1413         } else if ((amdgpu_smu_memory_pool_size == 4) ||
1414                 (amdgpu_smu_memory_pool_size == 8)) {
1415                 if (total_memory < dram_size_seven_GB)
1416                         goto def_value1;
1417         } else {
1418                 DRM_WARN("Smu memory pool size not supported\n");
1419                 goto def_value;
1420         }
1421         adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1422
1423         return;
1424
1425 def_value1:
1426         DRM_WARN("No enough system memory\n");
1427 def_value:
1428         adev->pm.smu_prv_buffer_size = 0;
1429 }
1430
1431 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1432 {
1433         if (!(adev->flags & AMD_IS_APU) ||
1434             adev->asic_type < CHIP_RAVEN)
1435                 return 0;
1436
1437         switch (adev->asic_type) {
1438         case CHIP_RAVEN:
1439                 if (adev->pdev->device == 0x15dd)
1440                         adev->apu_flags |= AMD_APU_IS_RAVEN;
1441                 if (adev->pdev->device == 0x15d8)
1442                         adev->apu_flags |= AMD_APU_IS_PICASSO;
1443                 break;
1444         case CHIP_RENOIR:
1445                 if ((adev->pdev->device == 0x1636) ||
1446                     (adev->pdev->device == 0x164c))
1447                         adev->apu_flags |= AMD_APU_IS_RENOIR;
1448                 else
1449                         adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1450                 break;
1451         case CHIP_VANGOGH:
1452                 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1453                 break;
1454         case CHIP_YELLOW_CARP:
1455                 break;
1456         case CHIP_CYAN_SKILLFISH:
1457                 if ((adev->pdev->device == 0x13FE) ||
1458                     (adev->pdev->device == 0x143F))
1459                         adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1460                 break;
1461         default:
1462                 break;
1463         }
1464
1465         return 0;
1466 }
1467
1468 /**
1469  * amdgpu_device_check_arguments - validate module params
1470  *
1471  * @adev: amdgpu_device pointer
1472  *
1473  * Validates certain module parameters and updates
1474  * the associated values used by the driver (all asics).
1475  */
1476 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1477 {
1478         if (amdgpu_sched_jobs < 4) {
1479                 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1480                          amdgpu_sched_jobs);
1481                 amdgpu_sched_jobs = 4;
1482         } else if (!is_power_of_2(amdgpu_sched_jobs)) {
1483                 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1484                          amdgpu_sched_jobs);
1485                 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1486         }
1487
1488         if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1489                 /* gart size must be greater or equal to 32M */
1490                 dev_warn(adev->dev, "gart size (%d) too small\n",
1491                          amdgpu_gart_size);
1492                 amdgpu_gart_size = -1;
1493         }
1494
1495         if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1496                 /* gtt size must be greater or equal to 32M */
1497                 dev_warn(adev->dev, "gtt size (%d) too small\n",
1498                                  amdgpu_gtt_size);
1499                 amdgpu_gtt_size = -1;
1500         }
1501
1502         /* valid range is between 4 and 9 inclusive */
1503         if (amdgpu_vm_fragment_size != -1 &&
1504             (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1505                 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1506                 amdgpu_vm_fragment_size = -1;
1507         }
1508
1509         if (amdgpu_sched_hw_submission < 2) {
1510                 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1511                          amdgpu_sched_hw_submission);
1512                 amdgpu_sched_hw_submission = 2;
1513         } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1514                 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1515                          amdgpu_sched_hw_submission);
1516                 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1517         }
1518
1519         if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1520                 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1521                 amdgpu_reset_method = -1;
1522         }
1523
1524         amdgpu_device_check_smu_prv_buffer_size(adev);
1525
1526         amdgpu_device_check_vm_size(adev);
1527
1528         amdgpu_device_check_block_size(adev);
1529
1530         adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1531
1532         return 0;
1533 }
1534
1535 /**
1536  * amdgpu_switcheroo_set_state - set switcheroo state
1537  *
1538  * @pdev: pci dev pointer
1539  * @state: vga_switcheroo state
1540  *
1541  * Callback for the switcheroo driver.  Suspends or resumes
1542  * the asics before or after it is powered up using ACPI methods.
1543  */
1544 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1545                                         enum vga_switcheroo_state state)
1546 {
1547         struct drm_device *dev = pci_get_drvdata(pdev);
1548         int r;
1549
1550         if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1551                 return;
1552
1553         if (state == VGA_SWITCHEROO_ON) {
1554                 pr_info("switched on\n");
1555                 /* don't suspend or resume card normally */
1556                 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1557
1558                 pci_set_power_state(pdev, PCI_D0);
1559                 amdgpu_device_load_pci_state(pdev);
1560                 r = pci_enable_device(pdev);
1561                 if (r)
1562                         DRM_WARN("pci_enable_device failed (%d)\n", r);
1563                 amdgpu_device_resume(dev, true);
1564
1565                 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1566         } else {
1567                 pr_info("switched off\n");
1568                 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1569                 amdgpu_device_suspend(dev, true);
1570                 amdgpu_device_cache_pci_state(pdev);
1571                 /* Shut down the device */
1572                 pci_disable_device(pdev);
1573                 pci_set_power_state(pdev, PCI_D3cold);
1574                 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1575         }
1576 }
1577
1578 /**
1579  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1580  *
1581  * @pdev: pci dev pointer
1582  *
1583  * Callback for the switcheroo driver.  Check of the switcheroo
1584  * state can be changed.
1585  * Returns true if the state can be changed, false if not.
1586  */
1587 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1588 {
1589         struct drm_device *dev = pci_get_drvdata(pdev);
1590
1591        /*
1592         * FIXME: open_count is protected by drm_global_mutex but that would lead to
1593         * locking inversion with the driver load path. And the access here is
1594         * completely racy anyway. So don't bother with locking for now.
1595         */
1596         return atomic_read(&dev->open_count) == 0;
1597 }
1598
1599 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1600         .set_gpu_state = amdgpu_switcheroo_set_state,
1601         .reprobe = NULL,
1602         .can_switch = amdgpu_switcheroo_can_switch,
1603 };
1604
1605 /**
1606  * amdgpu_device_ip_set_clockgating_state - set the CG state
1607  *
1608  * @dev: amdgpu_device pointer
1609  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1610  * @state: clockgating state (gate or ungate)
1611  *
1612  * Sets the requested clockgating state for all instances of
1613  * the hardware IP specified.
1614  * Returns the error code from the last instance.
1615  */
1616 int amdgpu_device_ip_set_clockgating_state(void *dev,
1617                                            enum amd_ip_block_type block_type,
1618                                            enum amd_clockgating_state state)
1619 {
1620         struct amdgpu_device *adev = dev;
1621         int i, r = 0;
1622
1623         for (i = 0; i < adev->num_ip_blocks; i++) {
1624                 if (!adev->ip_blocks[i].status.valid)
1625                         continue;
1626                 if (adev->ip_blocks[i].version->type != block_type)
1627                         continue;
1628                 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1629                         continue;
1630                 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1631                         (void *)adev, state);
1632                 if (r)
1633                         DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1634                                   adev->ip_blocks[i].version->funcs->name, r);
1635         }
1636         return r;
1637 }
1638
1639 /**
1640  * amdgpu_device_ip_set_powergating_state - set the PG state
1641  *
1642  * @dev: amdgpu_device pointer
1643  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1644  * @state: powergating state (gate or ungate)
1645  *
1646  * Sets the requested powergating state for all instances of
1647  * the hardware IP specified.
1648  * Returns the error code from the last instance.
1649  */
1650 int amdgpu_device_ip_set_powergating_state(void *dev,
1651                                            enum amd_ip_block_type block_type,
1652                                            enum amd_powergating_state state)
1653 {
1654         struct amdgpu_device *adev = dev;
1655         int i, r = 0;
1656
1657         for (i = 0; i < adev->num_ip_blocks; i++) {
1658                 if (!adev->ip_blocks[i].status.valid)
1659                         continue;
1660                 if (adev->ip_blocks[i].version->type != block_type)
1661                         continue;
1662                 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1663                         continue;
1664                 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1665                         (void *)adev, state);
1666                 if (r)
1667                         DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1668                                   adev->ip_blocks[i].version->funcs->name, r);
1669         }
1670         return r;
1671 }
1672
1673 /**
1674  * amdgpu_device_ip_get_clockgating_state - get the CG state
1675  *
1676  * @adev: amdgpu_device pointer
1677  * @flags: clockgating feature flags
1678  *
1679  * Walks the list of IPs on the device and updates the clockgating
1680  * flags for each IP.
1681  * Updates @flags with the feature flags for each hardware IP where
1682  * clockgating is enabled.
1683  */
1684 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1685                                             u64 *flags)
1686 {
1687         int i;
1688
1689         for (i = 0; i < adev->num_ip_blocks; i++) {
1690                 if (!adev->ip_blocks[i].status.valid)
1691                         continue;
1692                 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1693                         adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1694         }
1695 }
1696
1697 /**
1698  * amdgpu_device_ip_wait_for_idle - wait for idle
1699  *
1700  * @adev: amdgpu_device pointer
1701  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1702  *
1703  * Waits for the request hardware IP to be idle.
1704  * Returns 0 for success or a negative error code on failure.
1705  */
1706 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1707                                    enum amd_ip_block_type block_type)
1708 {
1709         int i, r;
1710
1711         for (i = 0; i < adev->num_ip_blocks; i++) {
1712                 if (!adev->ip_blocks[i].status.valid)
1713                         continue;
1714                 if (adev->ip_blocks[i].version->type == block_type) {
1715                         r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1716                         if (r)
1717                                 return r;
1718                         break;
1719                 }
1720         }
1721         return 0;
1722
1723 }
1724
1725 /**
1726  * amdgpu_device_ip_is_idle - is the hardware IP idle
1727  *
1728  * @adev: amdgpu_device pointer
1729  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1730  *
1731  * Check if the hardware IP is idle or not.
1732  * Returns true if it the IP is idle, false if not.
1733  */
1734 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1735                               enum amd_ip_block_type block_type)
1736 {
1737         int i;
1738
1739         for (i = 0; i < adev->num_ip_blocks; i++) {
1740                 if (!adev->ip_blocks[i].status.valid)
1741                         continue;
1742                 if (adev->ip_blocks[i].version->type == block_type)
1743                         return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1744         }
1745         return true;
1746
1747 }
1748
1749 /**
1750  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1751  *
1752  * @adev: amdgpu_device pointer
1753  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1754  *
1755  * Returns a pointer to the hardware IP block structure
1756  * if it exists for the asic, otherwise NULL.
1757  */
1758 struct amdgpu_ip_block *
1759 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1760                               enum amd_ip_block_type type)
1761 {
1762         int i;
1763
1764         for (i = 0; i < adev->num_ip_blocks; i++)
1765                 if (adev->ip_blocks[i].version->type == type)
1766                         return &adev->ip_blocks[i];
1767
1768         return NULL;
1769 }
1770
1771 /**
1772  * amdgpu_device_ip_block_version_cmp
1773  *
1774  * @adev: amdgpu_device pointer
1775  * @type: enum amd_ip_block_type
1776  * @major: major version
1777  * @minor: minor version
1778  *
1779  * return 0 if equal or greater
1780  * return 1 if smaller or the ip_block doesn't exist
1781  */
1782 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1783                                        enum amd_ip_block_type type,
1784                                        u32 major, u32 minor)
1785 {
1786         struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1787
1788         if (ip_block && ((ip_block->version->major > major) ||
1789                         ((ip_block->version->major == major) &&
1790                         (ip_block->version->minor >= minor))))
1791                 return 0;
1792
1793         return 1;
1794 }
1795
1796 /**
1797  * amdgpu_device_ip_block_add
1798  *
1799  * @adev: amdgpu_device pointer
1800  * @ip_block_version: pointer to the IP to add
1801  *
1802  * Adds the IP block driver information to the collection of IPs
1803  * on the asic.
1804  */
1805 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1806                                const struct amdgpu_ip_block_version *ip_block_version)
1807 {
1808         if (!ip_block_version)
1809                 return -EINVAL;
1810
1811         switch (ip_block_version->type) {
1812         case AMD_IP_BLOCK_TYPE_VCN:
1813                 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1814                         return 0;
1815                 break;
1816         case AMD_IP_BLOCK_TYPE_JPEG:
1817                 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1818                         return 0;
1819                 break;
1820         default:
1821                 break;
1822         }
1823
1824         DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1825                   ip_block_version->funcs->name);
1826
1827         adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1828
1829         return 0;
1830 }
1831
1832 /**
1833  * amdgpu_device_enable_virtual_display - enable virtual display feature
1834  *
1835  * @adev: amdgpu_device pointer
1836  *
1837  * Enabled the virtual display feature if the user has enabled it via
1838  * the module parameter virtual_display.  This feature provides a virtual
1839  * display hardware on headless boards or in virtualized environments.
1840  * This function parses and validates the configuration string specified by
1841  * the user and configues the virtual display configuration (number of
1842  * virtual connectors, crtcs, etc.) specified.
1843  */
1844 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1845 {
1846         adev->enable_virtual_display = false;
1847
1848         if (amdgpu_virtual_display) {
1849                 const char *pci_address_name = pci_name(adev->pdev);
1850                 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1851
1852                 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1853                 pciaddstr_tmp = pciaddstr;
1854                 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1855                         pciaddname = strsep(&pciaddname_tmp, ",");
1856                         if (!strcmp("all", pciaddname)
1857                             || !strcmp(pci_address_name, pciaddname)) {
1858                                 long num_crtc;
1859                                 int res = -1;
1860
1861                                 adev->enable_virtual_display = true;
1862
1863                                 if (pciaddname_tmp)
1864                                         res = kstrtol(pciaddname_tmp, 10,
1865                                                       &num_crtc);
1866
1867                                 if (!res) {
1868                                         if (num_crtc < 1)
1869                                                 num_crtc = 1;
1870                                         if (num_crtc > 6)
1871                                                 num_crtc = 6;
1872                                         adev->mode_info.num_crtc = num_crtc;
1873                                 } else {
1874                                         adev->mode_info.num_crtc = 1;
1875                                 }
1876                                 break;
1877                         }
1878                 }
1879
1880                 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1881                          amdgpu_virtual_display, pci_address_name,
1882                          adev->enable_virtual_display, adev->mode_info.num_crtc);
1883
1884                 kfree(pciaddstr);
1885         }
1886 }
1887
1888 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1889 {
1890         if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1891                 adev->mode_info.num_crtc = 1;
1892                 adev->enable_virtual_display = true;
1893                 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1894                          adev->enable_virtual_display, adev->mode_info.num_crtc);
1895         }
1896 }
1897
1898 /**
1899  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1900  *
1901  * @adev: amdgpu_device pointer
1902  *
1903  * Parses the asic configuration parameters specified in the gpu info
1904  * firmware and makes them availale to the driver for use in configuring
1905  * the asic.
1906  * Returns 0 on success, -EINVAL on failure.
1907  */
1908 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1909 {
1910         const char *chip_name;
1911         char fw_name[40];
1912         int err;
1913         const struct gpu_info_firmware_header_v1_0 *hdr;
1914
1915         adev->firmware.gpu_info_fw = NULL;
1916
1917         if (adev->mman.discovery_bin) {
1918                 /*
1919                  * FIXME: The bounding box is still needed by Navi12, so
1920                  * temporarily read it from gpu_info firmware. Should be dropped
1921                  * when DAL no longer needs it.
1922                  */
1923                 if (adev->asic_type != CHIP_NAVI12)
1924                         return 0;
1925         }
1926
1927         switch (adev->asic_type) {
1928         default:
1929                 return 0;
1930         case CHIP_VEGA10:
1931                 chip_name = "vega10";
1932                 break;
1933         case CHIP_VEGA12:
1934                 chip_name = "vega12";
1935                 break;
1936         case CHIP_RAVEN:
1937                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1938                         chip_name = "raven2";
1939                 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1940                         chip_name = "picasso";
1941                 else
1942                         chip_name = "raven";
1943                 break;
1944         case CHIP_ARCTURUS:
1945                 chip_name = "arcturus";
1946                 break;
1947         case CHIP_NAVI12:
1948                 chip_name = "navi12";
1949                 break;
1950         }
1951
1952         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1953         err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
1954         if (err) {
1955                 dev_err(adev->dev,
1956                         "Failed to get gpu_info firmware \"%s\"\n",
1957                         fw_name);
1958                 goto out;
1959         }
1960
1961         hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1962         amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1963
1964         switch (hdr->version_major) {
1965         case 1:
1966         {
1967                 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1968                         (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1969                                                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1970
1971                 /*
1972                  * Should be droped when DAL no longer needs it.
1973                  */
1974                 if (adev->asic_type == CHIP_NAVI12)
1975                         goto parse_soc_bounding_box;
1976
1977                 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1978                 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1979                 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1980                 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1981                 adev->gfx.config.max_texture_channel_caches =
1982                         le32_to_cpu(gpu_info_fw->gc_num_tccs);
1983                 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1984                 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1985                 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1986                 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1987                 adev->gfx.config.double_offchip_lds_buf =
1988                         le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1989                 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1990                 adev->gfx.cu_info.max_waves_per_simd =
1991                         le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1992                 adev->gfx.cu_info.max_scratch_slots_per_cu =
1993                         le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1994                 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1995                 if (hdr->version_minor >= 1) {
1996                         const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1997                                 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1998                                                                         le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1999                         adev->gfx.config.num_sc_per_sh =
2000                                 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2001                         adev->gfx.config.num_packer_per_sc =
2002                                 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2003                 }
2004
2005 parse_soc_bounding_box:
2006                 /*
2007                  * soc bounding box info is not integrated in disocovery table,
2008                  * we always need to parse it from gpu info firmware if needed.
2009                  */
2010                 if (hdr->version_minor == 2) {
2011                         const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2012                                 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2013                                                                         le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2014                         adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2015                 }
2016                 break;
2017         }
2018         default:
2019                 dev_err(adev->dev,
2020                         "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2021                 err = -EINVAL;
2022                 goto out;
2023         }
2024 out:
2025         return err;
2026 }
2027
2028 /**
2029  * amdgpu_device_ip_early_init - run early init for hardware IPs
2030  *
2031  * @adev: amdgpu_device pointer
2032  *
2033  * Early initialization pass for hardware IPs.  The hardware IPs that make
2034  * up each asic are discovered each IP's early_init callback is run.  This
2035  * is the first stage in initializing the asic.
2036  * Returns 0 on success, negative error code on failure.
2037  */
2038 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2039 {
2040         struct drm_device *dev = adev_to_drm(adev);
2041         struct pci_dev *parent;
2042         int i, r;
2043         bool total;
2044
2045         amdgpu_device_enable_virtual_display(adev);
2046
2047         if (amdgpu_sriov_vf(adev)) {
2048                 r = amdgpu_virt_request_full_gpu(adev, true);
2049                 if (r)
2050                         return r;
2051         }
2052
2053         switch (adev->asic_type) {
2054 #ifdef CONFIG_DRM_AMDGPU_SI
2055         case CHIP_VERDE:
2056         case CHIP_TAHITI:
2057         case CHIP_PITCAIRN:
2058         case CHIP_OLAND:
2059         case CHIP_HAINAN:
2060                 adev->family = AMDGPU_FAMILY_SI;
2061                 r = si_set_ip_blocks(adev);
2062                 if (r)
2063                         return r;
2064                 break;
2065 #endif
2066 #ifdef CONFIG_DRM_AMDGPU_CIK
2067         case CHIP_BONAIRE:
2068         case CHIP_HAWAII:
2069         case CHIP_KAVERI:
2070         case CHIP_KABINI:
2071         case CHIP_MULLINS:
2072                 if (adev->flags & AMD_IS_APU)
2073                         adev->family = AMDGPU_FAMILY_KV;
2074                 else
2075                         adev->family = AMDGPU_FAMILY_CI;
2076
2077                 r = cik_set_ip_blocks(adev);
2078                 if (r)
2079                         return r;
2080                 break;
2081 #endif
2082         case CHIP_TOPAZ:
2083         case CHIP_TONGA:
2084         case CHIP_FIJI:
2085         case CHIP_POLARIS10:
2086         case CHIP_POLARIS11:
2087         case CHIP_POLARIS12:
2088         case CHIP_VEGAM:
2089         case CHIP_CARRIZO:
2090         case CHIP_STONEY:
2091                 if (adev->flags & AMD_IS_APU)
2092                         adev->family = AMDGPU_FAMILY_CZ;
2093                 else
2094                         adev->family = AMDGPU_FAMILY_VI;
2095
2096                 r = vi_set_ip_blocks(adev);
2097                 if (r)
2098                         return r;
2099                 break;
2100         default:
2101                 r = amdgpu_discovery_set_ip_blocks(adev);
2102                 if (r)
2103                         return r;
2104                 break;
2105         }
2106
2107         if (amdgpu_has_atpx() &&
2108             (amdgpu_is_atpx_hybrid() ||
2109              amdgpu_has_atpx_dgpu_power_cntl()) &&
2110             ((adev->flags & AMD_IS_APU) == 0) &&
2111             !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2112                 adev->flags |= AMD_IS_PX;
2113
2114         if (!(adev->flags & AMD_IS_APU)) {
2115                 parent = pci_upstream_bridge(adev->pdev);
2116                 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2117         }
2118
2119
2120         adev->pm.pp_feature = amdgpu_pp_feature_mask;
2121         if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2122                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2123         if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2124                 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2125
2126         total = true;
2127         for (i = 0; i < adev->num_ip_blocks; i++) {
2128                 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2129                         DRM_WARN("disabled ip block: %d <%s>\n",
2130                                   i, adev->ip_blocks[i].version->funcs->name);
2131                         adev->ip_blocks[i].status.valid = false;
2132                 } else {
2133                         if (adev->ip_blocks[i].version->funcs->early_init) {
2134                                 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2135                                 if (r == -ENOENT) {
2136                                         adev->ip_blocks[i].status.valid = false;
2137                                 } else if (r) {
2138                                         DRM_ERROR("early_init of IP block <%s> failed %d\n",
2139                                                   adev->ip_blocks[i].version->funcs->name, r);
2140                                         total = false;
2141                                 } else {
2142                                         adev->ip_blocks[i].status.valid = true;
2143                                 }
2144                         } else {
2145                                 adev->ip_blocks[i].status.valid = true;
2146                         }
2147                 }
2148                 /* get the vbios after the asic_funcs are set up */
2149                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2150                         r = amdgpu_device_parse_gpu_info_fw(adev);
2151                         if (r)
2152                                 return r;
2153
2154                         /* Read BIOS */
2155                         if (amdgpu_device_read_bios(adev)) {
2156                                 if (!amdgpu_get_bios(adev))
2157                                         return -EINVAL;
2158
2159                                 r = amdgpu_atombios_init(adev);
2160                                 if (r) {
2161                                         dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2162                                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2163                                         return r;
2164                                 }
2165                         }
2166
2167                         /*get pf2vf msg info at it's earliest time*/
2168                         if (amdgpu_sriov_vf(adev))
2169                                 amdgpu_virt_init_data_exchange(adev);
2170
2171                 }
2172         }
2173         if (!total)
2174                 return -ENODEV;
2175
2176         amdgpu_amdkfd_device_probe(adev);
2177         adev->cg_flags &= amdgpu_cg_mask;
2178         adev->pg_flags &= amdgpu_pg_mask;
2179
2180         return 0;
2181 }
2182
2183 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2184 {
2185         int i, r;
2186
2187         for (i = 0; i < adev->num_ip_blocks; i++) {
2188                 if (!adev->ip_blocks[i].status.sw)
2189                         continue;
2190                 if (adev->ip_blocks[i].status.hw)
2191                         continue;
2192                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2193                     (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2194                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2195                         r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2196                         if (r) {
2197                                 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2198                                           adev->ip_blocks[i].version->funcs->name, r);
2199                                 return r;
2200                         }
2201                         adev->ip_blocks[i].status.hw = true;
2202                 }
2203         }
2204
2205         return 0;
2206 }
2207
2208 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2209 {
2210         int i, r;
2211
2212         for (i = 0; i < adev->num_ip_blocks; i++) {
2213                 if (!adev->ip_blocks[i].status.sw)
2214                         continue;
2215                 if (adev->ip_blocks[i].status.hw)
2216                         continue;
2217                 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2218                 if (r) {
2219                         DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2220                                   adev->ip_blocks[i].version->funcs->name, r);
2221                         return r;
2222                 }
2223                 adev->ip_blocks[i].status.hw = true;
2224         }
2225
2226         return 0;
2227 }
2228
2229 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2230 {
2231         int r = 0;
2232         int i;
2233         uint32_t smu_version;
2234
2235         if (adev->asic_type >= CHIP_VEGA10) {
2236                 for (i = 0; i < adev->num_ip_blocks; i++) {
2237                         if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2238                                 continue;
2239
2240                         if (!adev->ip_blocks[i].status.sw)
2241                                 continue;
2242
2243                         /* no need to do the fw loading again if already done*/
2244                         if (adev->ip_blocks[i].status.hw == true)
2245                                 break;
2246
2247                         if (amdgpu_in_reset(adev) || adev->in_suspend) {
2248                                 r = adev->ip_blocks[i].version->funcs->resume(adev);
2249                                 if (r) {
2250                                         DRM_ERROR("resume of IP block <%s> failed %d\n",
2251                                                           adev->ip_blocks[i].version->funcs->name, r);
2252                                         return r;
2253                                 }
2254                         } else {
2255                                 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2256                                 if (r) {
2257                                         DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2258                                                           adev->ip_blocks[i].version->funcs->name, r);
2259                                         return r;
2260                                 }
2261                         }
2262
2263                         adev->ip_blocks[i].status.hw = true;
2264                         break;
2265                 }
2266         }
2267
2268         if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2269                 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2270
2271         return r;
2272 }
2273
2274 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2275 {
2276         long timeout;
2277         int r, i;
2278
2279         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2280                 struct amdgpu_ring *ring = adev->rings[i];
2281
2282                 /* No need to setup the GPU scheduler for rings that don't need it */
2283                 if (!ring || ring->no_scheduler)
2284                         continue;
2285
2286                 switch (ring->funcs->type) {
2287                 case AMDGPU_RING_TYPE_GFX:
2288                         timeout = adev->gfx_timeout;
2289                         break;
2290                 case AMDGPU_RING_TYPE_COMPUTE:
2291                         timeout = adev->compute_timeout;
2292                         break;
2293                 case AMDGPU_RING_TYPE_SDMA:
2294                         timeout = adev->sdma_timeout;
2295                         break;
2296                 default:
2297                         timeout = adev->video_timeout;
2298                         break;
2299                 }
2300
2301                 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2302                                    ring->num_hw_submission, 0,
2303                                    timeout, adev->reset_domain->wq,
2304                                    ring->sched_score, ring->name,
2305                                    adev->dev);
2306                 if (r) {
2307                         DRM_ERROR("Failed to create scheduler on ring %s.\n",
2308                                   ring->name);
2309                         return r;
2310                 }
2311         }
2312
2313         amdgpu_xcp_update_partition_sched_list(adev);
2314
2315         return 0;
2316 }
2317
2318
2319 /**
2320  * amdgpu_device_ip_init - run init for hardware IPs
2321  *
2322  * @adev: amdgpu_device pointer
2323  *
2324  * Main initialization pass for hardware IPs.  The list of all the hardware
2325  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2326  * are run.  sw_init initializes the software state associated with each IP
2327  * and hw_init initializes the hardware associated with each IP.
2328  * Returns 0 on success, negative error code on failure.
2329  */
2330 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2331 {
2332         int i, r;
2333
2334         r = amdgpu_ras_init(adev);
2335         if (r)
2336                 return r;
2337
2338         for (i = 0; i < adev->num_ip_blocks; i++) {
2339                 if (!adev->ip_blocks[i].status.valid)
2340                         continue;
2341                 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2342                 if (r) {
2343                         DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2344                                   adev->ip_blocks[i].version->funcs->name, r);
2345                         goto init_failed;
2346                 }
2347                 adev->ip_blocks[i].status.sw = true;
2348
2349                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2350                         /* need to do common hw init early so everything is set up for gmc */
2351                         r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2352                         if (r) {
2353                                 DRM_ERROR("hw_init %d failed %d\n", i, r);
2354                                 goto init_failed;
2355                         }
2356                         adev->ip_blocks[i].status.hw = true;
2357                 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2358                         /* need to do gmc hw init early so we can allocate gpu mem */
2359                         /* Try to reserve bad pages early */
2360                         if (amdgpu_sriov_vf(adev))
2361                                 amdgpu_virt_exchange_data(adev);
2362
2363                         r = amdgpu_device_mem_scratch_init(adev);
2364                         if (r) {
2365                                 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2366                                 goto init_failed;
2367                         }
2368                         r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2369                         if (r) {
2370                                 DRM_ERROR("hw_init %d failed %d\n", i, r);
2371                                 goto init_failed;
2372                         }
2373                         r = amdgpu_device_wb_init(adev);
2374                         if (r) {
2375                                 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2376                                 goto init_failed;
2377                         }
2378                         adev->ip_blocks[i].status.hw = true;
2379
2380                         /* right after GMC hw init, we create CSA */
2381                         if (adev->gfx.mcbp) {
2382                                 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2383                                                                AMDGPU_GEM_DOMAIN_VRAM |
2384                                                                AMDGPU_GEM_DOMAIN_GTT,
2385                                                                AMDGPU_CSA_SIZE);
2386                                 if (r) {
2387                                         DRM_ERROR("allocate CSA failed %d\n", r);
2388                                         goto init_failed;
2389                                 }
2390                         }
2391                 }
2392         }
2393
2394         if (amdgpu_sriov_vf(adev))
2395                 amdgpu_virt_init_data_exchange(adev);
2396
2397         r = amdgpu_ib_pool_init(adev);
2398         if (r) {
2399                 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2400                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2401                 goto init_failed;
2402         }
2403
2404         r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2405         if (r)
2406                 goto init_failed;
2407
2408         r = amdgpu_device_ip_hw_init_phase1(adev);
2409         if (r)
2410                 goto init_failed;
2411
2412         r = amdgpu_device_fw_loading(adev);
2413         if (r)
2414                 goto init_failed;
2415
2416         r = amdgpu_device_ip_hw_init_phase2(adev);
2417         if (r)
2418                 goto init_failed;
2419
2420         /*
2421          * retired pages will be loaded from eeprom and reserved here,
2422          * it should be called after amdgpu_device_ip_hw_init_phase2  since
2423          * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2424          * for I2C communication which only true at this point.
2425          *
2426          * amdgpu_ras_recovery_init may fail, but the upper only cares the
2427          * failure from bad gpu situation and stop amdgpu init process
2428          * accordingly. For other failed cases, it will still release all
2429          * the resource and print error message, rather than returning one
2430          * negative value to upper level.
2431          *
2432          * Note: theoretically, this should be called before all vram allocations
2433          * to protect retired page from abusing
2434          */
2435         r = amdgpu_ras_recovery_init(adev);
2436         if (r)
2437                 goto init_failed;
2438
2439         /**
2440          * In case of XGMI grab extra reference for reset domain for this device
2441          */
2442         if (adev->gmc.xgmi.num_physical_nodes > 1) {
2443                 if (amdgpu_xgmi_add_device(adev) == 0) {
2444                         if (!amdgpu_sriov_vf(adev)) {
2445                                 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2446
2447                                 if (WARN_ON(!hive)) {
2448                                         r = -ENOENT;
2449                                         goto init_failed;
2450                                 }
2451
2452                                 if (!hive->reset_domain ||
2453                                     !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2454                                         r = -ENOENT;
2455                                         amdgpu_put_xgmi_hive(hive);
2456                                         goto init_failed;
2457                                 }
2458
2459                                 /* Drop the early temporary reset domain we created for device */
2460                                 amdgpu_reset_put_reset_domain(adev->reset_domain);
2461                                 adev->reset_domain = hive->reset_domain;
2462                                 amdgpu_put_xgmi_hive(hive);
2463                         }
2464                 }
2465         }
2466
2467         r = amdgpu_device_init_schedulers(adev);
2468         if (r)
2469                 goto init_failed;
2470
2471         /* Don't init kfd if whole hive need to be reset during init */
2472         if (!adev->gmc.xgmi.pending_reset) {
2473                 kgd2kfd_init_zone_device(adev);
2474                 amdgpu_amdkfd_device_init(adev);
2475         }
2476
2477         amdgpu_fru_get_product_info(adev);
2478
2479 init_failed:
2480
2481         return r;
2482 }
2483
2484 /**
2485  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2486  *
2487  * @adev: amdgpu_device pointer
2488  *
2489  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2490  * this function before a GPU reset.  If the value is retained after a
2491  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2492  */
2493 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2494 {
2495         memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2496 }
2497
2498 /**
2499  * amdgpu_device_check_vram_lost - check if vram is valid
2500  *
2501  * @adev: amdgpu_device pointer
2502  *
2503  * Checks the reset magic value written to the gart pointer in VRAM.
2504  * The driver calls this after a GPU reset to see if the contents of
2505  * VRAM is lost or now.
2506  * returns true if vram is lost, false if not.
2507  */
2508 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2509 {
2510         if (memcmp(adev->gart.ptr, adev->reset_magic,
2511                         AMDGPU_RESET_MAGIC_NUM))
2512                 return true;
2513
2514         if (!amdgpu_in_reset(adev))
2515                 return false;
2516
2517         /*
2518          * For all ASICs with baco/mode1 reset, the VRAM is
2519          * always assumed to be lost.
2520          */
2521         switch (amdgpu_asic_reset_method(adev)) {
2522         case AMD_RESET_METHOD_BACO:
2523         case AMD_RESET_METHOD_MODE1:
2524                 return true;
2525         default:
2526                 return false;
2527         }
2528 }
2529
2530 /**
2531  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2532  *
2533  * @adev: amdgpu_device pointer
2534  * @state: clockgating state (gate or ungate)
2535  *
2536  * The list of all the hardware IPs that make up the asic is walked and the
2537  * set_clockgating_state callbacks are run.
2538  * Late initialization pass enabling clockgating for hardware IPs.
2539  * Fini or suspend, pass disabling clockgating for hardware IPs.
2540  * Returns 0 on success, negative error code on failure.
2541  */
2542
2543 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2544                                enum amd_clockgating_state state)
2545 {
2546         int i, j, r;
2547
2548         if (amdgpu_emu_mode == 1)
2549                 return 0;
2550
2551         for (j = 0; j < adev->num_ip_blocks; j++) {
2552                 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2553                 if (!adev->ip_blocks[i].status.late_initialized)
2554                         continue;
2555                 /* skip CG for GFX, SDMA on S0ix */
2556                 if (adev->in_s0ix &&
2557                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2558                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2559                         continue;
2560                 /* skip CG for VCE/UVD, it's handled specially */
2561                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2562                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2563                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2564                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2565                     adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2566                         /* enable clockgating to save power */
2567                         r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2568                                                                                      state);
2569                         if (r) {
2570                                 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2571                                           adev->ip_blocks[i].version->funcs->name, r);
2572                                 return r;
2573                         }
2574                 }
2575         }
2576
2577         return 0;
2578 }
2579
2580 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2581                                enum amd_powergating_state state)
2582 {
2583         int i, j, r;
2584
2585         if (amdgpu_emu_mode == 1)
2586                 return 0;
2587
2588         for (j = 0; j < adev->num_ip_blocks; j++) {
2589                 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2590                 if (!adev->ip_blocks[i].status.late_initialized)
2591                         continue;
2592                 /* skip PG for GFX, SDMA on S0ix */
2593                 if (adev->in_s0ix &&
2594                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2595                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2596                         continue;
2597                 /* skip CG for VCE/UVD, it's handled specially */
2598                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2599                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2600                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2601                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2602                     adev->ip_blocks[i].version->funcs->set_powergating_state) {
2603                         /* enable powergating to save power */
2604                         r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2605                                                                                         state);
2606                         if (r) {
2607                                 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2608                                           adev->ip_blocks[i].version->funcs->name, r);
2609                                 return r;
2610                         }
2611                 }
2612         }
2613         return 0;
2614 }
2615
2616 static int amdgpu_device_enable_mgpu_fan_boost(void)
2617 {
2618         struct amdgpu_gpu_instance *gpu_ins;
2619         struct amdgpu_device *adev;
2620         int i, ret = 0;
2621
2622         mutex_lock(&mgpu_info.mutex);
2623
2624         /*
2625          * MGPU fan boost feature should be enabled
2626          * only when there are two or more dGPUs in
2627          * the system
2628          */
2629         if (mgpu_info.num_dgpu < 2)
2630                 goto out;
2631
2632         for (i = 0; i < mgpu_info.num_dgpu; i++) {
2633                 gpu_ins = &(mgpu_info.gpu_ins[i]);
2634                 adev = gpu_ins->adev;
2635                 if (!(adev->flags & AMD_IS_APU) &&
2636                     !gpu_ins->mgpu_fan_enabled) {
2637                         ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2638                         if (ret)
2639                                 break;
2640
2641                         gpu_ins->mgpu_fan_enabled = 1;
2642                 }
2643         }
2644
2645 out:
2646         mutex_unlock(&mgpu_info.mutex);
2647
2648         return ret;
2649 }
2650
2651 /**
2652  * amdgpu_device_ip_late_init - run late init for hardware IPs
2653  *
2654  * @adev: amdgpu_device pointer
2655  *
2656  * Late initialization pass for hardware IPs.  The list of all the hardware
2657  * IPs that make up the asic is walked and the late_init callbacks are run.
2658  * late_init covers any special initialization that an IP requires
2659  * after all of the have been initialized or something that needs to happen
2660  * late in the init process.
2661  * Returns 0 on success, negative error code on failure.
2662  */
2663 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2664 {
2665         struct amdgpu_gpu_instance *gpu_instance;
2666         int i = 0, r;
2667
2668         for (i = 0; i < adev->num_ip_blocks; i++) {
2669                 if (!adev->ip_blocks[i].status.hw)
2670                         continue;
2671                 if (adev->ip_blocks[i].version->funcs->late_init) {
2672                         r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2673                         if (r) {
2674                                 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2675                                           adev->ip_blocks[i].version->funcs->name, r);
2676                                 return r;
2677                         }
2678                 }
2679                 adev->ip_blocks[i].status.late_initialized = true;
2680         }
2681
2682         r = amdgpu_ras_late_init(adev);
2683         if (r) {
2684                 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2685                 return r;
2686         }
2687
2688         amdgpu_ras_set_error_query_ready(adev, true);
2689
2690         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2691         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2692
2693         amdgpu_device_fill_reset_magic(adev);
2694
2695         r = amdgpu_device_enable_mgpu_fan_boost();
2696         if (r)
2697                 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2698
2699         /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2700         if (amdgpu_passthrough(adev) &&
2701             ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2702              adev->asic_type == CHIP_ALDEBARAN))
2703                 amdgpu_dpm_handle_passthrough_sbr(adev, true);
2704
2705         if (adev->gmc.xgmi.num_physical_nodes > 1) {
2706                 mutex_lock(&mgpu_info.mutex);
2707
2708                 /*
2709                  * Reset device p-state to low as this was booted with high.
2710                  *
2711                  * This should be performed only after all devices from the same
2712                  * hive get initialized.
2713                  *
2714                  * However, it's unknown how many device in the hive in advance.
2715                  * As this is counted one by one during devices initializations.
2716                  *
2717                  * So, we wait for all XGMI interlinked devices initialized.
2718                  * This may bring some delays as those devices may come from
2719                  * different hives. But that should be OK.
2720                  */
2721                 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2722                         for (i = 0; i < mgpu_info.num_gpu; i++) {
2723                                 gpu_instance = &(mgpu_info.gpu_ins[i]);
2724                                 if (gpu_instance->adev->flags & AMD_IS_APU)
2725                                         continue;
2726
2727                                 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2728                                                 AMDGPU_XGMI_PSTATE_MIN);
2729                                 if (r) {
2730                                         DRM_ERROR("pstate setting failed (%d).\n", r);
2731                                         break;
2732                                 }
2733                         }
2734                 }
2735
2736                 mutex_unlock(&mgpu_info.mutex);
2737         }
2738
2739         return 0;
2740 }
2741
2742 /**
2743  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2744  *
2745  * @adev: amdgpu_device pointer
2746  *
2747  * For ASICs need to disable SMC first
2748  */
2749 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2750 {
2751         int i, r;
2752
2753         if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2754                 return;
2755
2756         for (i = 0; i < adev->num_ip_blocks; i++) {
2757                 if (!adev->ip_blocks[i].status.hw)
2758                         continue;
2759                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2760                         r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2761                         /* XXX handle errors */
2762                         if (r) {
2763                                 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2764                                           adev->ip_blocks[i].version->funcs->name, r);
2765                         }
2766                         adev->ip_blocks[i].status.hw = false;
2767                         break;
2768                 }
2769         }
2770 }
2771
2772 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2773 {
2774         int i, r;
2775
2776         for (i = 0; i < adev->num_ip_blocks; i++) {
2777                 if (!adev->ip_blocks[i].version->funcs->early_fini)
2778                         continue;
2779
2780                 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2781                 if (r) {
2782                         DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2783                                   adev->ip_blocks[i].version->funcs->name, r);
2784                 }
2785         }
2786
2787         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2788         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2789
2790         amdgpu_amdkfd_suspend(adev, false);
2791
2792         /* Workaroud for ASICs need to disable SMC first */
2793         amdgpu_device_smu_fini_early(adev);
2794
2795         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2796                 if (!adev->ip_blocks[i].status.hw)
2797                         continue;
2798
2799                 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2800                 /* XXX handle errors */
2801                 if (r) {
2802                         DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2803                                   adev->ip_blocks[i].version->funcs->name, r);
2804                 }
2805
2806                 adev->ip_blocks[i].status.hw = false;
2807         }
2808
2809         if (amdgpu_sriov_vf(adev)) {
2810                 if (amdgpu_virt_release_full_gpu(adev, false))
2811                         DRM_ERROR("failed to release exclusive mode on fini\n");
2812         }
2813
2814         return 0;
2815 }
2816
2817 /**
2818  * amdgpu_device_ip_fini - run fini for hardware IPs
2819  *
2820  * @adev: amdgpu_device pointer
2821  *
2822  * Main teardown pass for hardware IPs.  The list of all the hardware
2823  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2824  * are run.  hw_fini tears down the hardware associated with each IP
2825  * and sw_fini tears down any software state associated with each IP.
2826  * Returns 0 on success, negative error code on failure.
2827  */
2828 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2829 {
2830         int i, r;
2831
2832         if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2833                 amdgpu_virt_release_ras_err_handler_data(adev);
2834
2835         if (adev->gmc.xgmi.num_physical_nodes > 1)
2836                 amdgpu_xgmi_remove_device(adev);
2837
2838         amdgpu_amdkfd_device_fini_sw(adev);
2839
2840         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2841                 if (!adev->ip_blocks[i].status.sw)
2842                         continue;
2843
2844                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2845                         amdgpu_ucode_free_bo(adev);
2846                         amdgpu_free_static_csa(&adev->virt.csa_obj);
2847                         amdgpu_device_wb_fini(adev);
2848                         amdgpu_device_mem_scratch_fini(adev);
2849                         amdgpu_ib_pool_fini(adev);
2850                 }
2851
2852                 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2853                 /* XXX handle errors */
2854                 if (r) {
2855                         DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2856                                   adev->ip_blocks[i].version->funcs->name, r);
2857                 }
2858                 adev->ip_blocks[i].status.sw = false;
2859                 adev->ip_blocks[i].status.valid = false;
2860         }
2861
2862         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2863                 if (!adev->ip_blocks[i].status.late_initialized)
2864                         continue;
2865                 if (adev->ip_blocks[i].version->funcs->late_fini)
2866                         adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2867                 adev->ip_blocks[i].status.late_initialized = false;
2868         }
2869
2870         amdgpu_ras_fini(adev);
2871
2872         return 0;
2873 }
2874
2875 /**
2876  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2877  *
2878  * @work: work_struct.
2879  */
2880 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2881 {
2882         struct amdgpu_device *adev =
2883                 container_of(work, struct amdgpu_device, delayed_init_work.work);
2884         int r;
2885
2886         r = amdgpu_ib_ring_tests(adev);
2887         if (r)
2888                 DRM_ERROR("ib ring test failed (%d).\n", r);
2889 }
2890
2891 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2892 {
2893         struct amdgpu_device *adev =
2894                 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2895
2896         WARN_ON_ONCE(adev->gfx.gfx_off_state);
2897         WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2898
2899         if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2900                 adev->gfx.gfx_off_state = true;
2901 }
2902
2903 /**
2904  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2905  *
2906  * @adev: amdgpu_device pointer
2907  *
2908  * Main suspend function for hardware IPs.  The list of all the hardware
2909  * IPs that make up the asic is walked, clockgating is disabled and the
2910  * suspend callbacks are run.  suspend puts the hardware and software state
2911  * in each IP into a state suitable for suspend.
2912  * Returns 0 on success, negative error code on failure.
2913  */
2914 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2915 {
2916         int i, r;
2917
2918         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2919         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2920
2921         /*
2922          * Per PMFW team's suggestion, driver needs to handle gfxoff
2923          * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2924          * scenario. Add the missing df cstate disablement here.
2925          */
2926         if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2927                 dev_warn(adev->dev, "Failed to disallow df cstate");
2928
2929         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2930                 if (!adev->ip_blocks[i].status.valid)
2931                         continue;
2932
2933                 /* displays are handled separately */
2934                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2935                         continue;
2936
2937                 /* XXX handle errors */
2938                 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2939                 /* XXX handle errors */
2940                 if (r) {
2941                         DRM_ERROR("suspend of IP block <%s> failed %d\n",
2942                                   adev->ip_blocks[i].version->funcs->name, r);
2943                         return r;
2944                 }
2945
2946                 adev->ip_blocks[i].status.hw = false;
2947         }
2948
2949         return 0;
2950 }
2951
2952 /**
2953  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2954  *
2955  * @adev: amdgpu_device pointer
2956  *
2957  * Main suspend function for hardware IPs.  The list of all the hardware
2958  * IPs that make up the asic is walked, clockgating is disabled and the
2959  * suspend callbacks are run.  suspend puts the hardware and software state
2960  * in each IP into a state suitable for suspend.
2961  * Returns 0 on success, negative error code on failure.
2962  */
2963 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2964 {
2965         int i, r;
2966
2967         if (adev->in_s0ix)
2968                 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
2969
2970         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2971                 if (!adev->ip_blocks[i].status.valid)
2972                         continue;
2973                 /* displays are handled in phase1 */
2974                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2975                         continue;
2976                 /* PSP lost connection when err_event_athub occurs */
2977                 if (amdgpu_ras_intr_triggered() &&
2978                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2979                         adev->ip_blocks[i].status.hw = false;
2980                         continue;
2981                 }
2982
2983                 /* skip unnecessary suspend if we do not initialize them yet */
2984                 if (adev->gmc.xgmi.pending_reset &&
2985                     !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2986                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
2987                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2988                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
2989                         adev->ip_blocks[i].status.hw = false;
2990                         continue;
2991                 }
2992
2993                 /* skip suspend of gfx/mes and psp for S0ix
2994                  * gfx is in gfxoff state, so on resume it will exit gfxoff just
2995                  * like at runtime. PSP is also part of the always on hardware
2996                  * so no need to suspend it.
2997                  */
2998                 if (adev->in_s0ix &&
2999                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3000                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3001                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3002                         continue;
3003
3004                 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3005                 if (adev->in_s0ix &&
3006                     (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3007                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3008                         continue;
3009
3010                 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3011                  * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3012                  * from this location and RLC Autoload automatically also gets loaded
3013                  * from here based on PMFW -> PSP message during re-init sequence.
3014                  * Therefore, the psp suspend & resume should be skipped to avoid destroy
3015                  * the TMR and reload FWs again for IMU enabled APU ASICs.
3016                  */
3017                 if (amdgpu_in_reset(adev) &&
3018                     (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3019                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3020                         continue;
3021
3022                 /* XXX handle errors */
3023                 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3024                 /* XXX handle errors */
3025                 if (r) {
3026                         DRM_ERROR("suspend of IP block <%s> failed %d\n",
3027                                   adev->ip_blocks[i].version->funcs->name, r);
3028                 }
3029                 adev->ip_blocks[i].status.hw = false;
3030                 /* handle putting the SMC in the appropriate state */
3031                 if (!amdgpu_sriov_vf(adev)) {
3032                         if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3033                                 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3034                                 if (r) {
3035                                         DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3036                                                         adev->mp1_state, r);
3037                                         return r;
3038                                 }
3039                         }
3040                 }
3041         }
3042
3043         return 0;
3044 }
3045
3046 /**
3047  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3048  *
3049  * @adev: amdgpu_device pointer
3050  *
3051  * Main suspend function for hardware IPs.  The list of all the hardware
3052  * IPs that make up the asic is walked, clockgating is disabled and the
3053  * suspend callbacks are run.  suspend puts the hardware and software state
3054  * in each IP into a state suitable for suspend.
3055  * Returns 0 on success, negative error code on failure.
3056  */
3057 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3058 {
3059         int r;
3060
3061         if (amdgpu_sriov_vf(adev)) {
3062                 amdgpu_virt_fini_data_exchange(adev);
3063                 amdgpu_virt_request_full_gpu(adev, false);
3064         }
3065
3066         r = amdgpu_device_ip_suspend_phase1(adev);
3067         if (r)
3068                 return r;
3069         r = amdgpu_device_ip_suspend_phase2(adev);
3070
3071         if (amdgpu_sriov_vf(adev))
3072                 amdgpu_virt_release_full_gpu(adev, false);
3073
3074         return r;
3075 }
3076
3077 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3078 {
3079         int i, r;
3080
3081         static enum amd_ip_block_type ip_order[] = {
3082                 AMD_IP_BLOCK_TYPE_COMMON,
3083                 AMD_IP_BLOCK_TYPE_GMC,
3084                 AMD_IP_BLOCK_TYPE_PSP,
3085                 AMD_IP_BLOCK_TYPE_IH,
3086         };
3087
3088         for (i = 0; i < adev->num_ip_blocks; i++) {
3089                 int j;
3090                 struct amdgpu_ip_block *block;
3091
3092                 block = &adev->ip_blocks[i];
3093                 block->status.hw = false;
3094
3095                 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3096
3097                         if (block->version->type != ip_order[j] ||
3098                                 !block->status.valid)
3099                                 continue;
3100
3101                         r = block->version->funcs->hw_init(adev);
3102                         DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3103                         if (r)
3104                                 return r;
3105                         block->status.hw = true;
3106                 }
3107         }
3108
3109         return 0;
3110 }
3111
3112 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3113 {
3114         int i, r;
3115
3116         static enum amd_ip_block_type ip_order[] = {
3117                 AMD_IP_BLOCK_TYPE_SMC,
3118                 AMD_IP_BLOCK_TYPE_DCE,
3119                 AMD_IP_BLOCK_TYPE_GFX,
3120                 AMD_IP_BLOCK_TYPE_SDMA,
3121                 AMD_IP_BLOCK_TYPE_MES,
3122                 AMD_IP_BLOCK_TYPE_UVD,
3123                 AMD_IP_BLOCK_TYPE_VCE,
3124                 AMD_IP_BLOCK_TYPE_VCN,
3125                 AMD_IP_BLOCK_TYPE_JPEG
3126         };
3127
3128         for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3129                 int j;
3130                 struct amdgpu_ip_block *block;
3131
3132                 for (j = 0; j < adev->num_ip_blocks; j++) {
3133                         block = &adev->ip_blocks[j];
3134
3135                         if (block->version->type != ip_order[i] ||
3136                                 !block->status.valid ||
3137                                 block->status.hw)
3138                                 continue;
3139
3140                         if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3141                                 r = block->version->funcs->resume(adev);
3142                         else
3143                                 r = block->version->funcs->hw_init(adev);
3144
3145                         DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3146                         if (r)
3147                                 return r;
3148                         block->status.hw = true;
3149                 }
3150         }
3151
3152         return 0;
3153 }
3154
3155 /**
3156  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3157  *
3158  * @adev: amdgpu_device pointer
3159  *
3160  * First resume function for hardware IPs.  The list of all the hardware
3161  * IPs that make up the asic is walked and the resume callbacks are run for
3162  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3163  * after a suspend and updates the software state as necessary.  This
3164  * function is also used for restoring the GPU after a GPU reset.
3165  * Returns 0 on success, negative error code on failure.
3166  */
3167 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3168 {
3169         int i, r;
3170
3171         for (i = 0; i < adev->num_ip_blocks; i++) {
3172                 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3173                         continue;
3174                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3175                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3176                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3177                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3178
3179                         r = adev->ip_blocks[i].version->funcs->resume(adev);
3180                         if (r) {
3181                                 DRM_ERROR("resume of IP block <%s> failed %d\n",
3182                                           adev->ip_blocks[i].version->funcs->name, r);
3183                                 return r;
3184                         }
3185                         adev->ip_blocks[i].status.hw = true;
3186                 }
3187         }
3188
3189         return 0;
3190 }
3191
3192 /**
3193  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3194  *
3195  * @adev: amdgpu_device pointer
3196  *
3197  * First resume function for hardware IPs.  The list of all the hardware
3198  * IPs that make up the asic is walked and the resume callbacks are run for
3199  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3200  * functional state after a suspend and updates the software state as
3201  * necessary.  This function is also used for restoring the GPU after a GPU
3202  * reset.
3203  * Returns 0 on success, negative error code on failure.
3204  */
3205 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3206 {
3207         int i, r;
3208
3209         for (i = 0; i < adev->num_ip_blocks; i++) {
3210                 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3211                         continue;
3212                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3213                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3214                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3215                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3216                         continue;
3217                 r = adev->ip_blocks[i].version->funcs->resume(adev);
3218                 if (r) {
3219                         DRM_ERROR("resume of IP block <%s> failed %d\n",
3220                                   adev->ip_blocks[i].version->funcs->name, r);
3221                         return r;
3222                 }
3223                 adev->ip_blocks[i].status.hw = true;
3224         }
3225
3226         return 0;
3227 }
3228
3229 /**
3230  * amdgpu_device_ip_resume - run resume for hardware IPs
3231  *
3232  * @adev: amdgpu_device pointer
3233  *
3234  * Main resume function for hardware IPs.  The hardware IPs
3235  * are split into two resume functions because they are
3236  * also used in recovering from a GPU reset and some additional
3237  * steps need to be take between them.  In this case (S3/S4) they are
3238  * run sequentially.
3239  * Returns 0 on success, negative error code on failure.
3240  */
3241 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3242 {
3243         int r;
3244
3245         r = amdgpu_device_ip_resume_phase1(adev);
3246         if (r)
3247                 return r;
3248
3249         r = amdgpu_device_fw_loading(adev);
3250         if (r)
3251                 return r;
3252
3253         r = amdgpu_device_ip_resume_phase2(adev);
3254
3255         return r;
3256 }
3257
3258 /**
3259  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3260  *
3261  * @adev: amdgpu_device pointer
3262  *
3263  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3264  */
3265 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3266 {
3267         if (amdgpu_sriov_vf(adev)) {
3268                 if (adev->is_atom_fw) {
3269                         if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3270                                 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3271                 } else {
3272                         if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3273                                 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3274                 }
3275
3276                 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3277                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3278         }
3279 }
3280
3281 /**
3282  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3283  *
3284  * @asic_type: AMD asic type
3285  *
3286  * Check if there is DC (new modesetting infrastructre) support for an asic.
3287  * returns true if DC has support, false if not.
3288  */
3289 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3290 {
3291         switch (asic_type) {
3292 #ifdef CONFIG_DRM_AMDGPU_SI
3293         case CHIP_HAINAN:
3294 #endif
3295         case CHIP_TOPAZ:
3296                 /* chips with no display hardware */
3297                 return false;
3298 #if defined(CONFIG_DRM_AMD_DC)
3299         case CHIP_TAHITI:
3300         case CHIP_PITCAIRN:
3301         case CHIP_VERDE:
3302         case CHIP_OLAND:
3303                 /*
3304                  * We have systems in the wild with these ASICs that require
3305                  * LVDS and VGA support which is not supported with DC.
3306                  *
3307                  * Fallback to the non-DC driver here by default so as not to
3308                  * cause regressions.
3309                  */
3310 #if defined(CONFIG_DRM_AMD_DC_SI)
3311                 return amdgpu_dc > 0;
3312 #else
3313                 return false;
3314 #endif
3315         case CHIP_BONAIRE:
3316         case CHIP_KAVERI:
3317         case CHIP_KABINI:
3318         case CHIP_MULLINS:
3319                 /*
3320                  * We have systems in the wild with these ASICs that require
3321                  * VGA support which is not supported with DC.
3322                  *
3323                  * Fallback to the non-DC driver here by default so as not to
3324                  * cause regressions.
3325                  */
3326                 return amdgpu_dc > 0;
3327         default:
3328                 return amdgpu_dc != 0;
3329 #else
3330         default:
3331                 if (amdgpu_dc > 0)
3332                         DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3333                 return false;
3334 #endif
3335         }
3336 }
3337
3338 /**
3339  * amdgpu_device_has_dc_support - check if dc is supported
3340  *
3341  * @adev: amdgpu_device pointer
3342  *
3343  * Returns true for supported, false for not supported
3344  */
3345 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3346 {
3347         if (adev->enable_virtual_display ||
3348             (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3349                 return false;
3350
3351         return amdgpu_device_asic_has_dc_support(adev->asic_type);
3352 }
3353
3354 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3355 {
3356         struct amdgpu_device *adev =
3357                 container_of(__work, struct amdgpu_device, xgmi_reset_work);
3358         struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3359
3360         /* It's a bug to not have a hive within this function */
3361         if (WARN_ON(!hive))
3362                 return;
3363
3364         /*
3365          * Use task barrier to synchronize all xgmi reset works across the
3366          * hive. task_barrier_enter and task_barrier_exit will block
3367          * until all the threads running the xgmi reset works reach
3368          * those points. task_barrier_full will do both blocks.
3369          */
3370         if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3371
3372                 task_barrier_enter(&hive->tb);
3373                 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3374
3375                 if (adev->asic_reset_res)
3376                         goto fail;
3377
3378                 task_barrier_exit(&hive->tb);
3379                 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3380
3381                 if (adev->asic_reset_res)
3382                         goto fail;
3383
3384                 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3385                     adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3386                         adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
3387         } else {
3388
3389                 task_barrier_full(&hive->tb);
3390                 adev->asic_reset_res =  amdgpu_asic_reset(adev);
3391         }
3392
3393 fail:
3394         if (adev->asic_reset_res)
3395                 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3396                          adev->asic_reset_res, adev_to_drm(adev)->unique);
3397         amdgpu_put_xgmi_hive(hive);
3398 }
3399
3400 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3401 {
3402         char *input = amdgpu_lockup_timeout;
3403         char *timeout_setting = NULL;
3404         int index = 0;
3405         long timeout;
3406         int ret = 0;
3407
3408         /*
3409          * By default timeout for non compute jobs is 10000
3410          * and 60000 for compute jobs.
3411          * In SR-IOV or passthrough mode, timeout for compute
3412          * jobs are 60000 by default.
3413          */
3414         adev->gfx_timeout = msecs_to_jiffies(10000);
3415         adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3416         if (amdgpu_sriov_vf(adev))
3417                 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3418                                         msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3419         else
3420                 adev->compute_timeout =  msecs_to_jiffies(60000);
3421
3422         if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3423                 while ((timeout_setting = strsep(&input, ",")) &&
3424                                 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3425                         ret = kstrtol(timeout_setting, 0, &timeout);
3426                         if (ret)
3427                                 return ret;
3428
3429                         if (timeout == 0) {
3430                                 index++;
3431                                 continue;
3432                         } else if (timeout < 0) {
3433                                 timeout = MAX_SCHEDULE_TIMEOUT;
3434                                 dev_warn(adev->dev, "lockup timeout disabled");
3435                                 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3436                         } else {
3437                                 timeout = msecs_to_jiffies(timeout);
3438                         }
3439
3440                         switch (index++) {
3441                         case 0:
3442                                 adev->gfx_timeout = timeout;
3443                                 break;
3444                         case 1:
3445                                 adev->compute_timeout = timeout;
3446                                 break;
3447                         case 2:
3448                                 adev->sdma_timeout = timeout;
3449                                 break;
3450                         case 3:
3451                                 adev->video_timeout = timeout;
3452                                 break;
3453                         default:
3454                                 break;
3455                         }
3456                 }
3457                 /*
3458                  * There is only one value specified and
3459                  * it should apply to all non-compute jobs.
3460                  */
3461                 if (index == 1) {
3462                         adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3463                         if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3464                                 adev->compute_timeout = adev->gfx_timeout;
3465                 }
3466         }
3467
3468         return ret;
3469 }
3470
3471 /**
3472  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3473  *
3474  * @adev: amdgpu_device pointer
3475  *
3476  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3477  */
3478 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3479 {
3480         struct iommu_domain *domain;
3481
3482         domain = iommu_get_domain_for_dev(adev->dev);
3483         if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3484                 adev->ram_is_direct_mapped = true;
3485 }
3486
3487 static const struct attribute *amdgpu_dev_attributes[] = {
3488         &dev_attr_pcie_replay_count.attr,
3489         NULL
3490 };
3491
3492 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3493 {
3494         if (amdgpu_mcbp == 1)
3495                 adev->gfx.mcbp = true;
3496         else if (amdgpu_mcbp == 0)
3497                 adev->gfx.mcbp = false;
3498         else if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
3499                  (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
3500                  adev->gfx.num_gfx_rings)
3501                 adev->gfx.mcbp = true;
3502
3503         if (amdgpu_sriov_vf(adev))
3504                 adev->gfx.mcbp = true;
3505
3506         if (adev->gfx.mcbp)
3507                 DRM_INFO("MCBP is enabled\n");
3508 }
3509
3510 /**
3511  * amdgpu_device_init - initialize the driver
3512  *
3513  * @adev: amdgpu_device pointer
3514  * @flags: driver flags
3515  *
3516  * Initializes the driver info and hw (all asics).
3517  * Returns 0 for success or an error on failure.
3518  * Called at driver startup.
3519  */
3520 int amdgpu_device_init(struct amdgpu_device *adev,
3521                        uint32_t flags)
3522 {
3523         struct drm_device *ddev = adev_to_drm(adev);
3524         struct pci_dev *pdev = adev->pdev;
3525         int r, i;
3526         bool px = false;
3527         u32 max_MBps;
3528         int tmp;
3529
3530         adev->shutdown = false;
3531         adev->flags = flags;
3532
3533         if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3534                 adev->asic_type = amdgpu_force_asic_type;
3535         else
3536                 adev->asic_type = flags & AMD_ASIC_MASK;
3537
3538         adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3539         if (amdgpu_emu_mode == 1)
3540                 adev->usec_timeout *= 10;
3541         adev->gmc.gart_size = 512 * 1024 * 1024;
3542         adev->accel_working = false;
3543         adev->num_rings = 0;
3544         RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3545         adev->mman.buffer_funcs = NULL;
3546         adev->mman.buffer_funcs_ring = NULL;
3547         adev->vm_manager.vm_pte_funcs = NULL;
3548         adev->vm_manager.vm_pte_num_scheds = 0;
3549         adev->gmc.gmc_funcs = NULL;
3550         adev->harvest_ip_mask = 0x0;
3551         adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3552         bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3553
3554         adev->smc_rreg = &amdgpu_invalid_rreg;
3555         adev->smc_wreg = &amdgpu_invalid_wreg;
3556         adev->pcie_rreg = &amdgpu_invalid_rreg;
3557         adev->pcie_wreg = &amdgpu_invalid_wreg;
3558         adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3559         adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
3560         adev->pciep_rreg = &amdgpu_invalid_rreg;
3561         adev->pciep_wreg = &amdgpu_invalid_wreg;
3562         adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3563         adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3564         adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3565         adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3566         adev->didt_rreg = &amdgpu_invalid_rreg;
3567         adev->didt_wreg = &amdgpu_invalid_wreg;
3568         adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3569         adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3570         adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3571         adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3572
3573         DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3574                  amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3575                  pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3576
3577         /* mutex initialization are all done here so we
3578          * can recall function without having locking issues
3579          */
3580         mutex_init(&adev->firmware.mutex);
3581         mutex_init(&adev->pm.mutex);
3582         mutex_init(&adev->gfx.gpu_clock_mutex);
3583         mutex_init(&adev->srbm_mutex);
3584         mutex_init(&adev->gfx.pipe_reserve_mutex);
3585         mutex_init(&adev->gfx.gfx_off_mutex);
3586         mutex_init(&adev->gfx.partition_mutex);
3587         mutex_init(&adev->grbm_idx_mutex);
3588         mutex_init(&adev->mn_lock);
3589         mutex_init(&adev->virt.vf_errors.lock);
3590         hash_init(adev->mn_hash);
3591         mutex_init(&adev->psp.mutex);
3592         mutex_init(&adev->notifier_lock);
3593         mutex_init(&adev->pm.stable_pstate_ctx_lock);
3594         mutex_init(&adev->benchmark_mutex);
3595
3596         amdgpu_device_init_apu_flags(adev);
3597
3598         r = amdgpu_device_check_arguments(adev);
3599         if (r)
3600                 return r;
3601
3602         spin_lock_init(&adev->mmio_idx_lock);
3603         spin_lock_init(&adev->smc_idx_lock);
3604         spin_lock_init(&adev->pcie_idx_lock);
3605         spin_lock_init(&adev->uvd_ctx_idx_lock);
3606         spin_lock_init(&adev->didt_idx_lock);
3607         spin_lock_init(&adev->gc_cac_idx_lock);
3608         spin_lock_init(&adev->se_cac_idx_lock);
3609         spin_lock_init(&adev->audio_endpt_idx_lock);
3610         spin_lock_init(&adev->mm_stats.lock);
3611
3612         INIT_LIST_HEAD(&adev->shadow_list);
3613         mutex_init(&adev->shadow_list_lock);
3614
3615         INIT_LIST_HEAD(&adev->reset_list);
3616
3617         INIT_LIST_HEAD(&adev->ras_list);
3618
3619         INIT_DELAYED_WORK(&adev->delayed_init_work,
3620                           amdgpu_device_delayed_init_work_handler);
3621         INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3622                           amdgpu_device_delay_enable_gfx_off);
3623
3624         INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3625
3626         adev->gfx.gfx_off_req_count = 1;
3627         adev->gfx.gfx_off_residency = 0;
3628         adev->gfx.gfx_off_entrycount = 0;
3629         adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3630
3631         atomic_set(&adev->throttling_logging_enabled, 1);
3632         /*
3633          * If throttling continues, logging will be performed every minute
3634          * to avoid log flooding. "-1" is subtracted since the thermal
3635          * throttling interrupt comes every second. Thus, the total logging
3636          * interval is 59 seconds(retelimited printk interval) + 1(waiting
3637          * for throttling interrupt) = 60 seconds.
3638          */
3639         ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3640         ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3641
3642         /* Registers mapping */
3643         /* TODO: block userspace mapping of io register */
3644         if (adev->asic_type >= CHIP_BONAIRE) {
3645                 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3646                 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3647         } else {
3648                 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3649                 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3650         }
3651
3652         for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3653                 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3654
3655         adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3656         if (!adev->rmmio)
3657                 return -ENOMEM;
3658
3659         DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3660         DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
3661
3662         /*
3663          * Reset domain needs to be present early, before XGMI hive discovered
3664          * (if any) and intitialized to use reset sem and in_gpu reset flag
3665          * early on during init and before calling to RREG32.
3666          */
3667         adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3668         if (!adev->reset_domain)
3669                 return -ENOMEM;
3670
3671         /* detect hw virtualization here */
3672         amdgpu_detect_virtualization(adev);
3673
3674         amdgpu_device_get_pcie_info(adev);
3675
3676         r = amdgpu_device_get_job_timeout_settings(adev);
3677         if (r) {
3678                 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3679                 return r;
3680         }
3681
3682         /* early init functions */
3683         r = amdgpu_device_ip_early_init(adev);
3684         if (r)
3685                 return r;
3686
3687         amdgpu_device_set_mcbp(adev);
3688
3689         /* Get rid of things like offb */
3690         r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3691         if (r)
3692                 return r;
3693
3694         /* Enable TMZ based on IP_VERSION */
3695         amdgpu_gmc_tmz_set(adev);
3696
3697         amdgpu_gmc_noretry_set(adev);
3698         /* Need to get xgmi info early to decide the reset behavior*/
3699         if (adev->gmc.xgmi.supported) {
3700                 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3701                 if (r)
3702                         return r;
3703         }
3704
3705         /* enable PCIE atomic ops */
3706         if (amdgpu_sriov_vf(adev)) {
3707                 if (adev->virt.fw_reserve.p_pf2vf)
3708                         adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3709                                                       adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3710                                 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3711         /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3712          * internal path natively support atomics, set have_atomics_support to true.
3713          */
3714         } else if ((adev->flags & AMD_IS_APU) &&
3715                    (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) {
3716                 adev->have_atomics_support = true;
3717         } else {
3718                 adev->have_atomics_support =
3719                         !pci_enable_atomic_ops_to_root(adev->pdev,
3720                                           PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3721                                           PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3722         }
3723
3724         if (!adev->have_atomics_support)
3725                 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3726
3727         /* doorbell bar mapping and doorbell index init*/
3728         amdgpu_doorbell_init(adev);
3729
3730         if (amdgpu_emu_mode == 1) {
3731                 /* post the asic on emulation mode */
3732                 emu_soc_asic_init(adev);
3733                 goto fence_driver_init;
3734         }
3735
3736         amdgpu_reset_init(adev);
3737
3738         /* detect if we are with an SRIOV vbios */
3739         if (adev->bios)
3740                 amdgpu_device_detect_sriov_bios(adev);
3741
3742         /* check if we need to reset the asic
3743          *  E.g., driver was not cleanly unloaded previously, etc.
3744          */
3745         if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3746                 if (adev->gmc.xgmi.num_physical_nodes) {
3747                         dev_info(adev->dev, "Pending hive reset.\n");
3748                         adev->gmc.xgmi.pending_reset = true;
3749                         /* Only need to init necessary block for SMU to handle the reset */
3750                         for (i = 0; i < adev->num_ip_blocks; i++) {
3751                                 if (!adev->ip_blocks[i].status.valid)
3752                                         continue;
3753                                 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3754                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3755                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3756                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
3757                                         DRM_DEBUG("IP %s disabled for hw_init.\n",
3758                                                 adev->ip_blocks[i].version->funcs->name);
3759                                         adev->ip_blocks[i].status.hw = true;
3760                                 }
3761                         }
3762                 } else {
3763                         tmp = amdgpu_reset_method;
3764                         /* It should do a default reset when loading or reloading the driver,
3765                          * regardless of the module parameter reset_method.
3766                          */
3767                         amdgpu_reset_method = AMD_RESET_METHOD_NONE;
3768                         r = amdgpu_asic_reset(adev);
3769                         amdgpu_reset_method = tmp;
3770                         if (r) {
3771                                 dev_err(adev->dev, "asic reset on init failed\n");
3772                                 goto failed;
3773                         }
3774                 }
3775         }
3776
3777         /* Post card if necessary */
3778         if (amdgpu_device_need_post(adev)) {
3779                 if (!adev->bios) {
3780                         dev_err(adev->dev, "no vBIOS found\n");
3781                         r = -EINVAL;
3782                         goto failed;
3783                 }
3784                 DRM_INFO("GPU posting now...\n");
3785                 r = amdgpu_device_asic_init(adev);
3786                 if (r) {
3787                         dev_err(adev->dev, "gpu post error!\n");
3788                         goto failed;
3789                 }
3790         }
3791
3792         if (adev->bios) {
3793                 if (adev->is_atom_fw) {
3794                         /* Initialize clocks */
3795                         r = amdgpu_atomfirmware_get_clock_info(adev);
3796                         if (r) {
3797                                 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3798                                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3799                                 goto failed;
3800                         }
3801                 } else {
3802                         /* Initialize clocks */
3803                         r = amdgpu_atombios_get_clock_info(adev);
3804                         if (r) {
3805                                 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3806                                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3807                                 goto failed;
3808                         }
3809                         /* init i2c buses */
3810                         if (!amdgpu_device_has_dc_support(adev))
3811                                 amdgpu_atombios_i2c_init(adev);
3812                 }
3813         }
3814
3815 fence_driver_init:
3816         /* Fence driver */
3817         r = amdgpu_fence_driver_sw_init(adev);
3818         if (r) {
3819                 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
3820                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3821                 goto failed;
3822         }
3823
3824         /* init the mode config */
3825         drm_mode_config_init(adev_to_drm(adev));
3826
3827         r = amdgpu_device_ip_init(adev);
3828         if (r) {
3829                 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3830                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
3831                 goto release_ras_con;
3832         }
3833
3834         amdgpu_fence_driver_hw_init(adev);
3835
3836         dev_info(adev->dev,
3837                 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3838                         adev->gfx.config.max_shader_engines,
3839                         adev->gfx.config.max_sh_per_se,
3840                         adev->gfx.config.max_cu_per_sh,
3841                         adev->gfx.cu_info.number);
3842
3843         adev->accel_working = true;
3844
3845         amdgpu_vm_check_compute_bug(adev);
3846
3847         /* Initialize the buffer migration limit. */
3848         if (amdgpu_moverate >= 0)
3849                 max_MBps = amdgpu_moverate;
3850         else
3851                 max_MBps = 8; /* Allow 8 MB/s. */
3852         /* Get a log2 for easy divisions. */
3853         adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3854
3855         r = amdgpu_atombios_sysfs_init(adev);
3856         if (r)
3857                 drm_err(&adev->ddev,
3858                         "registering atombios sysfs failed (%d).\n", r);
3859
3860         r = amdgpu_pm_sysfs_init(adev);
3861         if (r)
3862                 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
3863
3864         r = amdgpu_ucode_sysfs_init(adev);
3865         if (r) {
3866                 adev->ucode_sysfs_en = false;
3867                 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
3868         } else
3869                 adev->ucode_sysfs_en = true;
3870
3871         /*
3872          * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3873          * Otherwise the mgpu fan boost feature will be skipped due to the
3874          * gpu instance is counted less.
3875          */
3876         amdgpu_register_gpu_instance(adev);
3877
3878         /* enable clockgating, etc. after ib tests, etc. since some blocks require
3879          * explicit gating rather than handling it automatically.
3880          */
3881         if (!adev->gmc.xgmi.pending_reset) {
3882                 r = amdgpu_device_ip_late_init(adev);
3883                 if (r) {
3884                         dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3885                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
3886                         goto release_ras_con;
3887                 }
3888                 /* must succeed. */
3889                 amdgpu_ras_resume(adev);
3890                 queue_delayed_work(system_wq, &adev->delayed_init_work,
3891                                    msecs_to_jiffies(AMDGPU_RESUME_MS));
3892         }
3893
3894         if (amdgpu_sriov_vf(adev)) {
3895                 amdgpu_virt_release_full_gpu(adev, true);
3896                 flush_delayed_work(&adev->delayed_init_work);
3897         }
3898
3899         r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
3900         if (r)
3901                 dev_err(adev->dev, "Could not create amdgpu device attr\n");
3902
3903         amdgpu_fru_sysfs_init(adev);
3904
3905         if (IS_ENABLED(CONFIG_PERF_EVENTS))
3906                 r = amdgpu_pmu_init(adev);
3907         if (r)
3908                 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3909
3910         /* Have stored pci confspace at hand for restore in sudden PCI error */
3911         if (amdgpu_device_cache_pci_state(adev->pdev))
3912                 pci_restore_state(pdev);
3913
3914         /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3915         /* this will fail for cards that aren't VGA class devices, just
3916          * ignore it
3917          */
3918         if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3919                 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
3920
3921         px = amdgpu_device_supports_px(ddev);
3922
3923         if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
3924                                 apple_gmux_detect(NULL, NULL)))
3925                 vga_switcheroo_register_client(adev->pdev,
3926                                                &amdgpu_switcheroo_ops, px);
3927
3928         if (px)
3929                 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3930
3931         if (adev->gmc.xgmi.pending_reset)
3932                 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3933                                    msecs_to_jiffies(AMDGPU_RESUME_MS));
3934
3935         amdgpu_device_check_iommu_direct_map(adev);
3936
3937         return 0;
3938
3939 release_ras_con:
3940         if (amdgpu_sriov_vf(adev))
3941                 amdgpu_virt_release_full_gpu(adev, true);
3942
3943         /* failed in exclusive mode due to timeout */
3944         if (amdgpu_sriov_vf(adev) &&
3945                 !amdgpu_sriov_runtime(adev) &&
3946                 amdgpu_virt_mmio_blocked(adev) &&
3947                 !amdgpu_virt_wait_reset(adev)) {
3948                 dev_err(adev->dev, "VF exclusive mode timeout\n");
3949                 /* Don't send request since VF is inactive. */
3950                 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3951                 adev->virt.ops = NULL;
3952                 r = -EAGAIN;
3953         }
3954         amdgpu_release_ras_context(adev);
3955
3956 failed:
3957         amdgpu_vf_error_trans_all(adev);
3958
3959         return r;
3960 }
3961
3962 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3963 {
3964
3965         /* Clear all CPU mappings pointing to this device */
3966         unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3967
3968         /* Unmap all mapped bars - Doorbell, registers and VRAM */
3969         amdgpu_doorbell_fini(adev);
3970
3971         iounmap(adev->rmmio);
3972         adev->rmmio = NULL;
3973         if (adev->mman.aper_base_kaddr)
3974                 iounmap(adev->mman.aper_base_kaddr);
3975         adev->mman.aper_base_kaddr = NULL;
3976
3977         /* Memory manager related */
3978         if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
3979                 arch_phys_wc_del(adev->gmc.vram_mtrr);
3980                 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3981         }
3982 }
3983
3984 /**
3985  * amdgpu_device_fini_hw - tear down the driver
3986  *
3987  * @adev: amdgpu_device pointer
3988  *
3989  * Tear down the driver info (all asics).
3990  * Called at driver shutdown.
3991  */
3992 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
3993 {
3994         dev_info(adev->dev, "amdgpu: finishing device.\n");
3995         flush_delayed_work(&adev->delayed_init_work);
3996         adev->shutdown = true;
3997
3998         /* make sure IB test finished before entering exclusive mode
3999          * to avoid preemption on IB test
4000          */
4001         if (amdgpu_sriov_vf(adev)) {
4002                 amdgpu_virt_request_full_gpu(adev, false);
4003                 amdgpu_virt_fini_data_exchange(adev);
4004         }
4005
4006         /* disable all interrupts */
4007         amdgpu_irq_disable_all(adev);
4008         if (adev->mode_info.mode_config_initialized) {
4009                 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4010                         drm_helper_force_disable_all(adev_to_drm(adev));
4011                 else
4012                         drm_atomic_helper_shutdown(adev_to_drm(adev));
4013         }
4014         amdgpu_fence_driver_hw_fini(adev);
4015
4016         if (adev->mman.initialized)
4017                 drain_workqueue(adev->mman.bdev.wq);
4018
4019         if (adev->pm.sysfs_initialized)
4020                 amdgpu_pm_sysfs_fini(adev);
4021         if (adev->ucode_sysfs_en)
4022                 amdgpu_ucode_sysfs_fini(adev);
4023         sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4024         amdgpu_fru_sysfs_fini(adev);
4025
4026         /* disable ras feature must before hw fini */
4027         amdgpu_ras_pre_fini(adev);
4028
4029         amdgpu_device_ip_fini_early(adev);
4030
4031         amdgpu_irq_fini_hw(adev);
4032
4033         if (adev->mman.initialized)
4034                 ttm_device_clear_dma_mappings(&adev->mman.bdev);
4035
4036         amdgpu_gart_dummy_page_fini(adev);
4037
4038         if (drm_dev_is_unplugged(adev_to_drm(adev)))
4039                 amdgpu_device_unmap_mmio(adev);
4040
4041 }
4042
4043 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4044 {
4045         int idx;
4046         bool px;
4047
4048         amdgpu_fence_driver_sw_fini(adev);
4049         amdgpu_device_ip_fini(adev);
4050         amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4051         adev->accel_working = false;
4052         dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4053
4054         amdgpu_reset_fini(adev);
4055
4056         /* free i2c buses */
4057         if (!amdgpu_device_has_dc_support(adev))
4058                 amdgpu_i2c_fini(adev);
4059
4060         if (amdgpu_emu_mode != 1)
4061                 amdgpu_atombios_fini(adev);
4062
4063         kfree(adev->bios);
4064         adev->bios = NULL;
4065
4066         px = amdgpu_device_supports_px(adev_to_drm(adev));
4067
4068         if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4069                                 apple_gmux_detect(NULL, NULL)))
4070                 vga_switcheroo_unregister_client(adev->pdev);
4071
4072         if (px)
4073                 vga_switcheroo_fini_domain_pm_ops(adev->dev);
4074
4075         if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4076                 vga_client_unregister(adev->pdev);
4077
4078         if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4079
4080                 iounmap(adev->rmmio);
4081                 adev->rmmio = NULL;
4082                 amdgpu_doorbell_fini(adev);
4083                 drm_dev_exit(idx);
4084         }
4085
4086         if (IS_ENABLED(CONFIG_PERF_EVENTS))
4087                 amdgpu_pmu_fini(adev);
4088         if (adev->mman.discovery_bin)
4089                 amdgpu_discovery_fini(adev);
4090
4091         amdgpu_reset_put_reset_domain(adev->reset_domain);
4092         adev->reset_domain = NULL;
4093
4094         kfree(adev->pci_state);
4095
4096 }
4097
4098 /**
4099  * amdgpu_device_evict_resources - evict device resources
4100  * @adev: amdgpu device object
4101  *
4102  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4103  * of the vram memory type. Mainly used for evicting device resources
4104  * at suspend time.
4105  *
4106  */
4107 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4108 {
4109         int ret;
4110
4111         /* No need to evict vram on APUs for suspend to ram or s2idle */
4112         if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4113                 return 0;
4114
4115         ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4116         if (ret)
4117                 DRM_WARN("evicting device resources failed\n");
4118         return ret;
4119 }
4120
4121 /*
4122  * Suspend & resume.
4123  */
4124 /**
4125  * amdgpu_device_suspend - initiate device suspend
4126  *
4127  * @dev: drm dev pointer
4128  * @fbcon : notify the fbdev of suspend
4129  *
4130  * Puts the hw in the suspend state (all asics).
4131  * Returns 0 for success or an error on failure.
4132  * Called at driver suspend.
4133  */
4134 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4135 {
4136         struct amdgpu_device *adev = drm_to_adev(dev);
4137         int r = 0;
4138
4139         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4140                 return 0;
4141
4142         adev->in_suspend = true;
4143
4144         /* Evict the majority of BOs before grabbing the full access */
4145         r = amdgpu_device_evict_resources(adev);
4146         if (r)
4147                 return r;
4148
4149         if (amdgpu_sriov_vf(adev)) {
4150                 amdgpu_virt_fini_data_exchange(adev);
4151                 r = amdgpu_virt_request_full_gpu(adev, false);
4152                 if (r)
4153                         return r;
4154         }
4155
4156         if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4157                 DRM_WARN("smart shift update failed\n");
4158
4159         if (fbcon)
4160                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4161
4162         cancel_delayed_work_sync(&adev->delayed_init_work);
4163         flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4164
4165         amdgpu_ras_suspend(adev);
4166
4167         amdgpu_device_ip_suspend_phase1(adev);
4168
4169         if (!adev->in_s0ix)
4170                 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4171
4172         r = amdgpu_device_evict_resources(adev);
4173         if (r)
4174                 return r;
4175
4176         amdgpu_fence_driver_hw_fini(adev);
4177
4178         amdgpu_device_ip_suspend_phase2(adev);
4179
4180         if (amdgpu_sriov_vf(adev))
4181                 amdgpu_virt_release_full_gpu(adev, false);
4182
4183         return 0;
4184 }
4185
4186 /**
4187  * amdgpu_device_resume - initiate device resume
4188  *
4189  * @dev: drm dev pointer
4190  * @fbcon : notify the fbdev of resume
4191  *
4192  * Bring the hw back to operating state (all asics).
4193  * Returns 0 for success or an error on failure.
4194  * Called at driver resume.
4195  */
4196 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4197 {
4198         struct amdgpu_device *adev = drm_to_adev(dev);
4199         int r = 0;
4200
4201         if (amdgpu_sriov_vf(adev)) {
4202                 r = amdgpu_virt_request_full_gpu(adev, true);
4203                 if (r)
4204                         return r;
4205         }
4206
4207         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4208                 return 0;
4209
4210         if (adev->in_s0ix)
4211                 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4212
4213         /* post card */
4214         if (amdgpu_device_need_post(adev)) {
4215                 r = amdgpu_device_asic_init(adev);
4216                 if (r)
4217                         dev_err(adev->dev, "amdgpu asic init failed\n");
4218         }
4219
4220         r = amdgpu_device_ip_resume(adev);
4221
4222         if (r) {
4223                 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4224                 goto exit;
4225         }
4226         amdgpu_fence_driver_hw_init(adev);
4227
4228         r = amdgpu_device_ip_late_init(adev);
4229         if (r)
4230                 goto exit;
4231
4232         queue_delayed_work(system_wq, &adev->delayed_init_work,
4233                            msecs_to_jiffies(AMDGPU_RESUME_MS));
4234
4235         if (!adev->in_s0ix) {
4236                 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4237                 if (r)
4238                         goto exit;
4239         }
4240
4241 exit:
4242         if (amdgpu_sriov_vf(adev)) {
4243                 amdgpu_virt_init_data_exchange(adev);
4244                 amdgpu_virt_release_full_gpu(adev, true);
4245         }
4246
4247         if (r)
4248                 return r;
4249
4250         /* Make sure IB tests flushed */
4251         flush_delayed_work(&adev->delayed_init_work);
4252
4253         if (fbcon)
4254                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4255
4256         amdgpu_ras_resume(adev);
4257
4258         if (adev->mode_info.num_crtc) {
4259                 /*
4260                  * Most of the connector probing functions try to acquire runtime pm
4261                  * refs to ensure that the GPU is powered on when connector polling is
4262                  * performed. Since we're calling this from a runtime PM callback,
4263                  * trying to acquire rpm refs will cause us to deadlock.
4264                  *
4265                  * Since we're guaranteed to be holding the rpm lock, it's safe to
4266                  * temporarily disable the rpm helpers so this doesn't deadlock us.
4267                  */
4268 #ifdef CONFIG_PM
4269                 dev->dev->power.disable_depth++;
4270 #endif
4271                 if (!adev->dc_enabled)
4272                         drm_helper_hpd_irq_event(dev);
4273                 else
4274                         drm_kms_helper_hotplug_event(dev);
4275 #ifdef CONFIG_PM
4276                 dev->dev->power.disable_depth--;
4277 #endif
4278         }
4279         adev->in_suspend = false;
4280
4281         if (adev->enable_mes)
4282                 amdgpu_mes_self_test(adev);
4283
4284         if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4285                 DRM_WARN("smart shift update failed\n");
4286
4287         return 0;
4288 }
4289
4290 /**
4291  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4292  *
4293  * @adev: amdgpu_device pointer
4294  *
4295  * The list of all the hardware IPs that make up the asic is walked and
4296  * the check_soft_reset callbacks are run.  check_soft_reset determines
4297  * if the asic is still hung or not.
4298  * Returns true if any of the IPs are still in a hung state, false if not.
4299  */
4300 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4301 {
4302         int i;
4303         bool asic_hang = false;
4304
4305         if (amdgpu_sriov_vf(adev))
4306                 return true;
4307
4308         if (amdgpu_asic_need_full_reset(adev))
4309                 return true;
4310
4311         for (i = 0; i < adev->num_ip_blocks; i++) {
4312                 if (!adev->ip_blocks[i].status.valid)
4313                         continue;
4314                 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4315                         adev->ip_blocks[i].status.hang =
4316                                 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4317                 if (adev->ip_blocks[i].status.hang) {
4318                         dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4319                         asic_hang = true;
4320                 }
4321         }
4322         return asic_hang;
4323 }
4324
4325 /**
4326  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4327  *
4328  * @adev: amdgpu_device pointer
4329  *
4330  * The list of all the hardware IPs that make up the asic is walked and the
4331  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4332  * handles any IP specific hardware or software state changes that are
4333  * necessary for a soft reset to succeed.
4334  * Returns 0 on success, negative error code on failure.
4335  */
4336 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4337 {
4338         int i, r = 0;
4339
4340         for (i = 0; i < adev->num_ip_blocks; i++) {
4341                 if (!adev->ip_blocks[i].status.valid)
4342                         continue;
4343                 if (adev->ip_blocks[i].status.hang &&
4344                     adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4345                         r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4346                         if (r)
4347                                 return r;
4348                 }
4349         }
4350
4351         return 0;
4352 }
4353
4354 /**
4355  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4356  *
4357  * @adev: amdgpu_device pointer
4358  *
4359  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4360  * reset is necessary to recover.
4361  * Returns true if a full asic reset is required, false if not.
4362  */
4363 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4364 {
4365         int i;
4366
4367         if (amdgpu_asic_need_full_reset(adev))
4368                 return true;
4369
4370         for (i = 0; i < adev->num_ip_blocks; i++) {
4371                 if (!adev->ip_blocks[i].status.valid)
4372                         continue;
4373                 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4374                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4375                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4376                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4377                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4378                         if (adev->ip_blocks[i].status.hang) {
4379                                 dev_info(adev->dev, "Some block need full reset!\n");
4380                                 return true;
4381                         }
4382                 }
4383         }
4384         return false;
4385 }
4386
4387 /**
4388  * amdgpu_device_ip_soft_reset - do a soft reset
4389  *
4390  * @adev: amdgpu_device pointer
4391  *
4392  * The list of all the hardware IPs that make up the asic is walked and the
4393  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4394  * IP specific hardware or software state changes that are necessary to soft
4395  * reset the IP.
4396  * Returns 0 on success, negative error code on failure.
4397  */
4398 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4399 {
4400         int i, r = 0;
4401
4402         for (i = 0; i < adev->num_ip_blocks; i++) {
4403                 if (!adev->ip_blocks[i].status.valid)
4404                         continue;
4405                 if (adev->ip_blocks[i].status.hang &&
4406                     adev->ip_blocks[i].version->funcs->soft_reset) {
4407                         r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4408                         if (r)
4409                                 return r;
4410                 }
4411         }
4412
4413         return 0;
4414 }
4415
4416 /**
4417  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4418  *
4419  * @adev: amdgpu_device pointer
4420  *
4421  * The list of all the hardware IPs that make up the asic is walked and the
4422  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4423  * handles any IP specific hardware or software state changes that are
4424  * necessary after the IP has been soft reset.
4425  * Returns 0 on success, negative error code on failure.
4426  */
4427 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4428 {
4429         int i, r = 0;
4430
4431         for (i = 0; i < adev->num_ip_blocks; i++) {
4432                 if (!adev->ip_blocks[i].status.valid)
4433                         continue;
4434                 if (adev->ip_blocks[i].status.hang &&
4435                     adev->ip_blocks[i].version->funcs->post_soft_reset)
4436                         r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4437                 if (r)
4438                         return r;
4439         }
4440
4441         return 0;
4442 }
4443
4444 /**
4445  * amdgpu_device_recover_vram - Recover some VRAM contents
4446  *
4447  * @adev: amdgpu_device pointer
4448  *
4449  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4450  * restore things like GPUVM page tables after a GPU reset where
4451  * the contents of VRAM might be lost.
4452  *
4453  * Returns:
4454  * 0 on success, negative error code on failure.
4455  */
4456 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4457 {
4458         struct dma_fence *fence = NULL, *next = NULL;
4459         struct amdgpu_bo *shadow;
4460         struct amdgpu_bo_vm *vmbo;
4461         long r = 1, tmo;
4462
4463         if (amdgpu_sriov_runtime(adev))
4464                 tmo = msecs_to_jiffies(8000);
4465         else
4466                 tmo = msecs_to_jiffies(100);
4467
4468         dev_info(adev->dev, "recover vram bo from shadow start\n");
4469         mutex_lock(&adev->shadow_list_lock);
4470         list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4471                 /* If vm is compute context or adev is APU, shadow will be NULL */
4472                 if (!vmbo->shadow)
4473                         continue;
4474                 shadow = vmbo->shadow;
4475
4476                 /* No need to recover an evicted BO */
4477                 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4478                     shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4479                     shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
4480                         continue;
4481
4482                 r = amdgpu_bo_restore_shadow(shadow, &next);
4483                 if (r)
4484                         break;
4485
4486                 if (fence) {
4487                         tmo = dma_fence_wait_timeout(fence, false, tmo);
4488                         dma_fence_put(fence);
4489                         fence = next;
4490                         if (tmo == 0) {
4491                                 r = -ETIMEDOUT;
4492                                 break;
4493                         } else if (tmo < 0) {
4494                                 r = tmo;
4495                                 break;
4496                         }
4497                 } else {
4498                         fence = next;
4499                 }
4500         }
4501         mutex_unlock(&adev->shadow_list_lock);
4502
4503         if (fence)
4504                 tmo = dma_fence_wait_timeout(fence, false, tmo);
4505         dma_fence_put(fence);
4506
4507         if (r < 0 || tmo <= 0) {
4508                 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
4509                 return -EIO;
4510         }
4511
4512         dev_info(adev->dev, "recover vram bo from shadow done\n");
4513         return 0;
4514 }
4515
4516
4517 /**
4518  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4519  *
4520  * @adev: amdgpu_device pointer
4521  * @from_hypervisor: request from hypervisor
4522  *
4523  * do VF FLR and reinitialize Asic
4524  * return 0 means succeeded otherwise failed
4525  */
4526 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4527                                      bool from_hypervisor)
4528 {
4529         int r;
4530         struct amdgpu_hive_info *hive = NULL;
4531         int retry_limit = 0;
4532
4533 retry:
4534         amdgpu_amdkfd_pre_reset(adev);
4535
4536         if (from_hypervisor)
4537                 r = amdgpu_virt_request_full_gpu(adev, true);
4538         else
4539                 r = amdgpu_virt_reset_gpu(adev);
4540         if (r)
4541                 return r;
4542         amdgpu_irq_gpu_reset_resume_helper(adev);
4543
4544         /* some sw clean up VF needs to do before recover */
4545         amdgpu_virt_post_reset(adev);
4546
4547         /* Resume IP prior to SMC */
4548         r = amdgpu_device_ip_reinit_early_sriov(adev);
4549         if (r)
4550                 goto error;
4551
4552         amdgpu_virt_init_data_exchange(adev);
4553
4554         r = amdgpu_device_fw_loading(adev);
4555         if (r)
4556                 return r;
4557
4558         /* now we are okay to resume SMC/CP/SDMA */
4559         r = amdgpu_device_ip_reinit_late_sriov(adev);
4560         if (r)
4561                 goto error;
4562
4563         hive = amdgpu_get_xgmi_hive(adev);
4564         /* Update PSP FW topology after reset */
4565         if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4566                 r = amdgpu_xgmi_update_topology(hive, adev);
4567
4568         if (hive)
4569                 amdgpu_put_xgmi_hive(hive);
4570
4571         if (!r) {
4572                 r = amdgpu_ib_ring_tests(adev);
4573
4574                 amdgpu_amdkfd_post_reset(adev);
4575         }
4576
4577 error:
4578         if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
4579                 amdgpu_inc_vram_lost(adev);
4580                 r = amdgpu_device_recover_vram(adev);
4581         }
4582         amdgpu_virt_release_full_gpu(adev, true);
4583
4584         if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4585                 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4586                         retry_limit++;
4587                         goto retry;
4588                 } else
4589                         DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4590         }
4591
4592         return r;
4593 }
4594
4595 /**
4596  * amdgpu_device_has_job_running - check if there is any job in mirror list
4597  *
4598  * @adev: amdgpu_device pointer
4599  *
4600  * check if there is any job in mirror list
4601  */
4602 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4603 {
4604         int i;
4605         struct drm_sched_job *job;
4606
4607         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4608                 struct amdgpu_ring *ring = adev->rings[i];
4609
4610                 if (!ring || !ring->sched.thread)
4611                         continue;
4612
4613                 spin_lock(&ring->sched.job_list_lock);
4614                 job = list_first_entry_or_null(&ring->sched.pending_list,
4615                                                struct drm_sched_job, list);
4616                 spin_unlock(&ring->sched.job_list_lock);
4617                 if (job)
4618                         return true;
4619         }
4620         return false;
4621 }
4622
4623 /**
4624  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4625  *
4626  * @adev: amdgpu_device pointer
4627  *
4628  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4629  * a hung GPU.
4630  */
4631 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4632 {
4633
4634         if (amdgpu_gpu_recovery == 0)
4635                 goto disabled;
4636
4637         /* Skip soft reset check in fatal error mode */
4638         if (!amdgpu_ras_is_poison_mode_supported(adev))
4639                 return true;
4640
4641         if (amdgpu_sriov_vf(adev))
4642                 return true;
4643
4644         if (amdgpu_gpu_recovery == -1) {
4645                 switch (adev->asic_type) {
4646 #ifdef CONFIG_DRM_AMDGPU_SI
4647                 case CHIP_VERDE:
4648                 case CHIP_TAHITI:
4649                 case CHIP_PITCAIRN:
4650                 case CHIP_OLAND:
4651                 case CHIP_HAINAN:
4652 #endif
4653 #ifdef CONFIG_DRM_AMDGPU_CIK
4654                 case CHIP_KAVERI:
4655                 case CHIP_KABINI:
4656                 case CHIP_MULLINS:
4657 #endif
4658                 case CHIP_CARRIZO:
4659                 case CHIP_STONEY:
4660                 case CHIP_CYAN_SKILLFISH:
4661                         goto disabled;
4662                 default:
4663                         break;
4664                 }
4665         }
4666
4667         return true;
4668
4669 disabled:
4670                 dev_info(adev->dev, "GPU recovery disabled.\n");
4671                 return false;
4672 }
4673
4674 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4675 {
4676         u32 i;
4677         int ret = 0;
4678
4679         amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4680
4681         dev_info(adev->dev, "GPU mode1 reset\n");
4682
4683         /* disable BM */
4684         pci_clear_master(adev->pdev);
4685
4686         amdgpu_device_cache_pci_state(adev->pdev);
4687
4688         if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4689                 dev_info(adev->dev, "GPU smu mode1 reset\n");
4690                 ret = amdgpu_dpm_mode1_reset(adev);
4691         } else {
4692                 dev_info(adev->dev, "GPU psp mode1 reset\n");
4693                 ret = psp_gpu_reset(adev);
4694         }
4695
4696         if (ret)
4697                 dev_err(adev->dev, "GPU mode1 reset failed\n");
4698
4699         amdgpu_device_load_pci_state(adev->pdev);
4700
4701         /* wait for asic to come out of reset */
4702         for (i = 0; i < adev->usec_timeout; i++) {
4703                 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4704
4705                 if (memsize != 0xffffffff)
4706                         break;
4707                 udelay(1);
4708         }
4709
4710         amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4711         return ret;
4712 }
4713
4714 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4715                                  struct amdgpu_reset_context *reset_context)
4716 {
4717         int i, r = 0;
4718         struct amdgpu_job *job = NULL;
4719         bool need_full_reset =
4720                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4721
4722         if (reset_context->reset_req_dev == adev)
4723                 job = reset_context->job;
4724
4725         if (amdgpu_sriov_vf(adev)) {
4726                 /* stop the data exchange thread */
4727                 amdgpu_virt_fini_data_exchange(adev);
4728         }
4729
4730         amdgpu_fence_driver_isr_toggle(adev, true);
4731
4732         /* block all schedulers and reset given job's ring */
4733         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4734                 struct amdgpu_ring *ring = adev->rings[i];
4735
4736                 if (!ring || !ring->sched.thread)
4737                         continue;
4738
4739                 /* Clear job fence from fence drv to avoid force_completion
4740                  * leave NULL and vm flush fence in fence drv
4741                  */
4742                 amdgpu_fence_driver_clear_job_fences(ring);
4743
4744                 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4745                 amdgpu_fence_driver_force_completion(ring);
4746         }
4747
4748         amdgpu_fence_driver_isr_toggle(adev, false);
4749
4750         if (job && job->vm)
4751                 drm_sched_increase_karma(&job->base);
4752
4753         r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
4754         /* If reset handler not implemented, continue; otherwise return */
4755         if (r == -EOPNOTSUPP)
4756                 r = 0;
4757         else
4758                 return r;
4759
4760         /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
4761         if (!amdgpu_sriov_vf(adev)) {
4762
4763                 if (!need_full_reset)
4764                         need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4765
4766                 if (!need_full_reset && amdgpu_gpu_recovery &&
4767                     amdgpu_device_ip_check_soft_reset(adev)) {
4768                         amdgpu_device_ip_pre_soft_reset(adev);
4769                         r = amdgpu_device_ip_soft_reset(adev);
4770                         amdgpu_device_ip_post_soft_reset(adev);
4771                         if (r || amdgpu_device_ip_check_soft_reset(adev)) {
4772                                 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
4773                                 need_full_reset = true;
4774                         }
4775                 }
4776
4777                 if (need_full_reset)
4778                         r = amdgpu_device_ip_suspend(adev);
4779                 if (need_full_reset)
4780                         set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4781                 else
4782                         clear_bit(AMDGPU_NEED_FULL_RESET,
4783                                   &reset_context->flags);
4784         }
4785
4786         return r;
4787 }
4788
4789 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4790 {
4791         int i;
4792
4793         lockdep_assert_held(&adev->reset_domain->sem);
4794
4795         for (i = 0; i < adev->num_regs; i++) {
4796                 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4797                 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4798                                              adev->reset_dump_reg_value[i]);
4799         }
4800
4801         return 0;
4802 }
4803
4804 #ifdef CONFIG_DEV_COREDUMP
4805 static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4806                 size_t count, void *data, size_t datalen)
4807 {
4808         struct drm_printer p;
4809         struct amdgpu_device *adev = data;
4810         struct drm_print_iterator iter;
4811         int i;
4812
4813         iter.data = buffer;
4814         iter.offset = 0;
4815         iter.start = offset;
4816         iter.remain = count;
4817
4818         p = drm_coredump_printer(&iter);
4819
4820         drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4821         drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4822         drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4823         drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4824         if (adev->reset_task_info.pid)
4825                 drm_printf(&p, "process_name: %s PID: %d\n",
4826                            adev->reset_task_info.process_name,
4827                            adev->reset_task_info.pid);
4828
4829         if (adev->reset_vram_lost)
4830                 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4831         if (adev->num_regs) {
4832                 drm_printf(&p, "AMDGPU register dumps:\nOffset:     Value:\n");
4833
4834                 for (i = 0; i < adev->num_regs; i++)
4835                         drm_printf(&p, "0x%08x: 0x%08x\n",
4836                                    adev->reset_dump_reg_list[i],
4837                                    adev->reset_dump_reg_value[i]);
4838         }
4839
4840         return count - iter.remain;
4841 }
4842
4843 static void amdgpu_devcoredump_free(void *data)
4844 {
4845 }
4846
4847 static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4848 {
4849         struct drm_device *dev = adev_to_drm(adev);
4850
4851         ktime_get_ts64(&adev->reset_time);
4852         dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4853                       amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4854 }
4855 #endif
4856
4857 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4858                          struct amdgpu_reset_context *reset_context)
4859 {
4860         struct amdgpu_device *tmp_adev = NULL;
4861         bool need_full_reset, skip_hw_reset, vram_lost = false;
4862         int r = 0;
4863         bool gpu_reset_for_dev_remove = 0;
4864
4865         /* Try reset handler method first */
4866         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4867                                     reset_list);
4868         amdgpu_reset_reg_dumps(tmp_adev);
4869
4870         reset_context->reset_device_list = device_list_handle;
4871         r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
4872         /* If reset handler not implemented, continue; otherwise return */
4873         if (r == -EOPNOTSUPP)
4874                 r = 0;
4875         else
4876                 return r;
4877
4878         /* Reset handler not implemented, use the default method */
4879         need_full_reset =
4880                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4881         skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4882
4883         gpu_reset_for_dev_remove =
4884                 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4885                         test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4886
4887         /*
4888          * ASIC reset has to be done on all XGMI hive nodes ASAP
4889          * to allow proper links negotiation in FW (within 1 sec)
4890          */
4891         if (!skip_hw_reset && need_full_reset) {
4892                 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4893                         /* For XGMI run all resets in parallel to speed up the process */
4894                         if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4895                                 tmp_adev->gmc.xgmi.pending_reset = false;
4896                                 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
4897                                         r = -EALREADY;
4898                         } else
4899                                 r = amdgpu_asic_reset(tmp_adev);
4900
4901                         if (r) {
4902                                 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4903                                          r, adev_to_drm(tmp_adev)->unique);
4904                                 break;
4905                         }
4906                 }
4907
4908                 /* For XGMI wait for all resets to complete before proceed */
4909                 if (!r) {
4910                         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4911                                 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4912                                         flush_work(&tmp_adev->xgmi_reset_work);
4913                                         r = tmp_adev->asic_reset_res;
4914                                         if (r)
4915                                                 break;
4916                                 }
4917                         }
4918                 }
4919         }
4920
4921         if (!r && amdgpu_ras_intr_triggered()) {
4922                 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4923                         if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4924                             tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4925                                 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
4926                 }
4927
4928                 amdgpu_ras_intr_cleared();
4929         }
4930
4931         /* Since the mode1 reset affects base ip blocks, the
4932          * phase1 ip blocks need to be resumed. Otherwise there
4933          * will be a BIOS signature error and the psp bootloader
4934          * can't load kdb on the next amdgpu install.
4935          */
4936         if (gpu_reset_for_dev_remove) {
4937                 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4938                         amdgpu_device_ip_resume_phase1(tmp_adev);
4939
4940                 goto end;
4941         }
4942
4943         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4944                 if (need_full_reset) {
4945                         /* post card */
4946                         r = amdgpu_device_asic_init(tmp_adev);
4947                         if (r) {
4948                                 dev_warn(tmp_adev->dev, "asic atom init failed!");
4949                         } else {
4950                                 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4951
4952                                 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4953                                 if (r)
4954                                         goto out;
4955
4956                                 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4957 #ifdef CONFIG_DEV_COREDUMP
4958                                 tmp_adev->reset_vram_lost = vram_lost;
4959                                 memset(&tmp_adev->reset_task_info, 0,
4960                                                 sizeof(tmp_adev->reset_task_info));
4961                                 if (reset_context->job && reset_context->job->vm)
4962                                         tmp_adev->reset_task_info =
4963                                                 reset_context->job->vm->task_info;
4964                                 amdgpu_reset_capture_coredumpm(tmp_adev);
4965 #endif
4966                                 if (vram_lost) {
4967                                         DRM_INFO("VRAM is lost due to GPU reset!\n");
4968                                         amdgpu_inc_vram_lost(tmp_adev);
4969                                 }
4970
4971                                 r = amdgpu_device_fw_loading(tmp_adev);
4972                                 if (r)
4973                                         return r;
4974
4975                                 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4976                                 if (r)
4977                                         goto out;
4978
4979                                 if (vram_lost)
4980                                         amdgpu_device_fill_reset_magic(tmp_adev);
4981
4982                                 /*
4983                                  * Add this ASIC as tracked as reset was already
4984                                  * complete successfully.
4985                                  */
4986                                 amdgpu_register_gpu_instance(tmp_adev);
4987
4988                                 if (!reset_context->hive &&
4989                                     tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4990                                         amdgpu_xgmi_add_device(tmp_adev);
4991
4992                                 r = amdgpu_device_ip_late_init(tmp_adev);
4993                                 if (r)
4994                                         goto out;
4995
4996                                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
4997
4998                                 /*
4999                                  * The GPU enters bad state once faulty pages
5000                                  * by ECC has reached the threshold, and ras
5001                                  * recovery is scheduled next. So add one check
5002                                  * here to break recovery if it indeed exceeds
5003                                  * bad page threshold, and remind user to
5004                                  * retire this GPU or setting one bigger
5005                                  * bad_page_threshold value to fix this once
5006                                  * probing driver again.
5007                                  */
5008                                 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
5009                                         /* must succeed. */
5010                                         amdgpu_ras_resume(tmp_adev);
5011                                 } else {
5012                                         r = -EINVAL;
5013                                         goto out;
5014                                 }
5015
5016                                 /* Update PSP FW topology after reset */
5017                                 if (reset_context->hive &&
5018                                     tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5019                                         r = amdgpu_xgmi_update_topology(
5020                                                 reset_context->hive, tmp_adev);
5021                         }
5022                 }
5023
5024 out:
5025                 if (!r) {
5026                         amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5027                         r = amdgpu_ib_ring_tests(tmp_adev);
5028                         if (r) {
5029                                 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5030                                 need_full_reset = true;
5031                                 r = -EAGAIN;
5032                                 goto end;
5033                         }
5034                 }
5035
5036                 if (!r)
5037                         r = amdgpu_device_recover_vram(tmp_adev);
5038                 else
5039                         tmp_adev->asic_reset_res = r;
5040         }
5041
5042 end:
5043         if (need_full_reset)
5044                 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5045         else
5046                 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5047         return r;
5048 }
5049
5050 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5051 {
5052
5053         switch (amdgpu_asic_reset_method(adev)) {
5054         case AMD_RESET_METHOD_MODE1:
5055                 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5056                 break;
5057         case AMD_RESET_METHOD_MODE2:
5058                 adev->mp1_state = PP_MP1_STATE_RESET;
5059                 break;
5060         default:
5061                 adev->mp1_state = PP_MP1_STATE_NONE;
5062                 break;
5063         }
5064 }
5065
5066 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5067 {
5068         amdgpu_vf_error_trans_all(adev);
5069         adev->mp1_state = PP_MP1_STATE_NONE;
5070 }
5071
5072 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5073 {
5074         struct pci_dev *p = NULL;
5075
5076         p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5077                         adev->pdev->bus->number, 1);
5078         if (p) {
5079                 pm_runtime_enable(&(p->dev));
5080                 pm_runtime_resume(&(p->dev));
5081         }
5082
5083         pci_dev_put(p);
5084 }
5085
5086 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5087 {
5088         enum amd_reset_method reset_method;
5089         struct pci_dev *p = NULL;
5090         u64 expires;
5091
5092         /*
5093          * For now, only BACO and mode1 reset are confirmed
5094          * to suffer the audio issue without proper suspended.
5095          */
5096         reset_method = amdgpu_asic_reset_method(adev);
5097         if ((reset_method != AMD_RESET_METHOD_BACO) &&
5098              (reset_method != AMD_RESET_METHOD_MODE1))
5099                 return -EINVAL;
5100
5101         p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5102                         adev->pdev->bus->number, 1);
5103         if (!p)
5104                 return -ENODEV;
5105
5106         expires = pm_runtime_autosuspend_expiration(&(p->dev));
5107         if (!expires)
5108                 /*
5109                  * If we cannot get the audio device autosuspend delay,
5110                  * a fixed 4S interval will be used. Considering 3S is
5111                  * the audio controller default autosuspend delay setting.
5112                  * 4S used here is guaranteed to cover that.
5113                  */
5114                 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5115
5116         while (!pm_runtime_status_suspended(&(p->dev))) {
5117                 if (!pm_runtime_suspend(&(p->dev)))
5118                         break;
5119
5120                 if (expires < ktime_get_mono_fast_ns()) {
5121                         dev_warn(adev->dev, "failed to suspend display audio\n");
5122                         pci_dev_put(p);
5123                         /* TODO: abort the succeeding gpu reset? */
5124                         return -ETIMEDOUT;
5125                 }
5126         }
5127
5128         pm_runtime_disable(&(p->dev));
5129
5130         pci_dev_put(p);
5131         return 0;
5132 }
5133
5134 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5135 {
5136         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5137
5138 #if defined(CONFIG_DEBUG_FS)
5139         if (!amdgpu_sriov_vf(adev))
5140                 cancel_work(&adev->reset_work);
5141 #endif
5142
5143         if (adev->kfd.dev)
5144                 cancel_work(&adev->kfd.reset_work);
5145
5146         if (amdgpu_sriov_vf(adev))
5147                 cancel_work(&adev->virt.flr_work);
5148
5149         if (con && adev->ras_enabled)
5150                 cancel_work(&con->recovery_work);
5151
5152 }
5153
5154 /**
5155  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5156  *
5157  * @adev: amdgpu_device pointer
5158  * @job: which job trigger hang
5159  * @reset_context: amdgpu reset context pointer
5160  *
5161  * Attempt to reset the GPU if it has hung (all asics).
5162  * Attempt to do soft-reset or full-reset and reinitialize Asic
5163  * Returns 0 for success or an error on failure.
5164  */
5165
5166 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5167                               struct amdgpu_job *job,
5168                               struct amdgpu_reset_context *reset_context)
5169 {
5170         struct list_head device_list, *device_list_handle =  NULL;
5171         bool job_signaled = false;
5172         struct amdgpu_hive_info *hive = NULL;
5173         struct amdgpu_device *tmp_adev = NULL;
5174         int i, r = 0;
5175         bool need_emergency_restart = false;
5176         bool audio_suspended = false;
5177         bool gpu_reset_for_dev_remove = false;
5178
5179         gpu_reset_for_dev_remove =
5180                         test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5181                                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5182
5183         /*
5184          * Special case: RAS triggered and full reset isn't supported
5185          */
5186         need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5187
5188         /*
5189          * Flush RAM to disk so that after reboot
5190          * the user can read log and see why the system rebooted.
5191          */
5192         if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
5193                 DRM_WARN("Emergency reboot.");
5194
5195                 ksys_sync_helper();
5196                 emergency_restart();
5197         }
5198
5199         dev_info(adev->dev, "GPU %s begin!\n",
5200                 need_emergency_restart ? "jobs stop":"reset");
5201
5202         if (!amdgpu_sriov_vf(adev))
5203                 hive = amdgpu_get_xgmi_hive(adev);
5204         if (hive)
5205                 mutex_lock(&hive->hive_lock);
5206
5207         reset_context->job = job;
5208         reset_context->hive = hive;
5209         /*
5210          * Build list of devices to reset.
5211          * In case we are in XGMI hive mode, resort the device list
5212          * to put adev in the 1st position.
5213          */
5214         INIT_LIST_HEAD(&device_list);
5215         if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
5216                 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5217                         list_add_tail(&tmp_adev->reset_list, &device_list);
5218                         if (gpu_reset_for_dev_remove && adev->shutdown)
5219                                 tmp_adev->shutdown = true;
5220                 }
5221                 if (!list_is_first(&adev->reset_list, &device_list))
5222                         list_rotate_to_front(&adev->reset_list, &device_list);
5223                 device_list_handle = &device_list;
5224         } else {
5225                 list_add_tail(&adev->reset_list, &device_list);
5226                 device_list_handle = &device_list;
5227         }
5228
5229         /* We need to lock reset domain only once both for XGMI and single device */
5230         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5231                                     reset_list);
5232         amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5233
5234         /* block all schedulers and reset given job's ring */
5235         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5236
5237                 amdgpu_device_set_mp1_state(tmp_adev);
5238
5239                 /*
5240                  * Try to put the audio codec into suspend state
5241                  * before gpu reset started.
5242                  *
5243                  * Due to the power domain of the graphics device
5244                  * is shared with AZ power domain. Without this,
5245                  * we may change the audio hardware from behind
5246                  * the audio driver's back. That will trigger
5247                  * some audio codec errors.
5248                  */
5249                 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5250                         audio_suspended = true;
5251
5252                 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5253
5254                 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5255
5256                 if (!amdgpu_sriov_vf(tmp_adev))
5257                         amdgpu_amdkfd_pre_reset(tmp_adev);
5258
5259                 /*
5260                  * Mark these ASICs to be reseted as untracked first
5261                  * And add them back after reset completed
5262                  */
5263                 amdgpu_unregister_gpu_instance(tmp_adev);
5264
5265                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5266
5267                 /* disable ras on ALL IPs */
5268                 if (!need_emergency_restart &&
5269                       amdgpu_device_ip_need_full_reset(tmp_adev))
5270                         amdgpu_ras_suspend(tmp_adev);
5271
5272                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5273                         struct amdgpu_ring *ring = tmp_adev->rings[i];
5274
5275                         if (!ring || !ring->sched.thread)
5276                                 continue;
5277
5278                         drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5279
5280                         if (need_emergency_restart)
5281                                 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5282                 }
5283                 atomic_inc(&tmp_adev->gpu_reset_counter);
5284         }
5285
5286         if (need_emergency_restart)
5287                 goto skip_sched_resume;
5288
5289         /*
5290          * Must check guilty signal here since after this point all old
5291          * HW fences are force signaled.
5292          *
5293          * job->base holds a reference to parent fence
5294          */
5295         if (job && dma_fence_is_signaled(&job->hw_fence)) {
5296                 job_signaled = true;
5297                 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5298                 goto skip_hw_reset;
5299         }
5300
5301 retry:  /* Rest of adevs pre asic reset from XGMI hive. */
5302         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5303                 if (gpu_reset_for_dev_remove) {
5304                         /* Workaroud for ASICs need to disable SMC first */
5305                         amdgpu_device_smu_fini_early(tmp_adev);
5306                 }
5307                 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5308                 /*TODO Should we stop ?*/
5309                 if (r) {
5310                         dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5311                                   r, adev_to_drm(tmp_adev)->unique);
5312                         tmp_adev->asic_reset_res = r;
5313                 }
5314
5315                 /*
5316                  * Drop all pending non scheduler resets. Scheduler resets
5317                  * were already dropped during drm_sched_stop
5318                  */
5319                 amdgpu_device_stop_pending_resets(tmp_adev);
5320         }
5321
5322         /* Actual ASIC resets if needed.*/
5323         /* Host driver will handle XGMI hive reset for SRIOV */
5324         if (amdgpu_sriov_vf(adev)) {
5325                 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5326                 if (r)
5327                         adev->asic_reset_res = r;
5328
5329                 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5330                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
5331                     adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
5332                         amdgpu_ras_resume(adev);
5333         } else {
5334                 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5335                 if (r && r == -EAGAIN)
5336                         goto retry;
5337
5338                 if (!r && gpu_reset_for_dev_remove)
5339                         goto recover_end;
5340         }
5341
5342 skip_hw_reset:
5343
5344         /* Post ASIC reset for all devs .*/
5345         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5346
5347                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5348                         struct amdgpu_ring *ring = tmp_adev->rings[i];
5349
5350                         if (!ring || !ring->sched.thread)
5351                                 continue;
5352
5353                         drm_sched_start(&ring->sched, true);
5354                 }
5355
5356                 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
5357                         amdgpu_mes_self_test(tmp_adev);
5358
5359                 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5360                         drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5361
5362                 if (tmp_adev->asic_reset_res)
5363                         r = tmp_adev->asic_reset_res;
5364
5365                 tmp_adev->asic_reset_res = 0;
5366
5367                 if (r) {
5368                         /* bad news, how to tell it to userspace ? */
5369                         dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5370                         amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5371                 } else {
5372                         dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5373                         if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5374                                 DRM_WARN("smart shift update failed\n");
5375                 }
5376         }
5377
5378 skip_sched_resume:
5379         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5380                 /* unlock kfd: SRIOV would do it separately */
5381                 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5382                         amdgpu_amdkfd_post_reset(tmp_adev);
5383
5384                 /* kfd_post_reset will do nothing if kfd device is not initialized,
5385                  * need to bring up kfd here if it's not be initialized before
5386                  */
5387                 if (!adev->kfd.init_complete)
5388                         amdgpu_amdkfd_device_init(adev);
5389
5390                 if (audio_suspended)
5391                         amdgpu_device_resume_display_audio(tmp_adev);
5392
5393                 amdgpu_device_unset_mp1_state(tmp_adev);
5394
5395                 amdgpu_ras_set_error_query_ready(tmp_adev, true);
5396         }
5397
5398 recover_end:
5399         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5400                                             reset_list);
5401         amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5402
5403         if (hive) {
5404                 mutex_unlock(&hive->hive_lock);
5405                 amdgpu_put_xgmi_hive(hive);
5406         }
5407
5408         if (r)
5409                 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5410
5411         atomic_set(&adev->reset_domain->reset_res, r);
5412         return r;
5413 }
5414
5415 /**
5416  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5417  *
5418  * @adev: amdgpu_device pointer
5419  *
5420  * Fetchs and stores in the driver the PCIE capabilities (gen speed
5421  * and lanes) of the slot the device is in. Handles APUs and
5422  * virtualized environments where PCIE config space may not be available.
5423  */
5424 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5425 {
5426         struct pci_dev *pdev;
5427         enum pci_bus_speed speed_cap, platform_speed_cap;
5428         enum pcie_link_width platform_link_width;
5429
5430         if (amdgpu_pcie_gen_cap)
5431                 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5432
5433         if (amdgpu_pcie_lane_cap)
5434                 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5435
5436         /* covers APUs as well */
5437         if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
5438                 if (adev->pm.pcie_gen_mask == 0)
5439                         adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5440                 if (adev->pm.pcie_mlw_mask == 0)
5441                         adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5442                 return;
5443         }
5444
5445         if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5446                 return;
5447
5448         pcie_bandwidth_available(adev->pdev, NULL,
5449                                  &platform_speed_cap, &platform_link_width);
5450
5451         if (adev->pm.pcie_gen_mask == 0) {
5452                 /* asic caps */
5453                 pdev = adev->pdev;
5454                 speed_cap = pcie_get_speed_cap(pdev);
5455                 if (speed_cap == PCI_SPEED_UNKNOWN) {
5456                         adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5457                                                   CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5458                                                   CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5459                 } else {
5460                         if (speed_cap == PCIE_SPEED_32_0GT)
5461                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5462                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5463                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5464                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5465                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5466                         else if (speed_cap == PCIE_SPEED_16_0GT)
5467                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5468                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5469                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5470                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5471                         else if (speed_cap == PCIE_SPEED_8_0GT)
5472                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5473                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5474                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5475                         else if (speed_cap == PCIE_SPEED_5_0GT)
5476                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5477                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5478                         else
5479                                 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5480                 }
5481                 /* platform caps */
5482                 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5483                         adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5484                                                    CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5485                 } else {
5486                         if (platform_speed_cap == PCIE_SPEED_32_0GT)
5487                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5488                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5489                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5490                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5491                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5492                         else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5493                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5494                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5495                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5496                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
5497                         else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5498                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5499                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5500                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
5501                         else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5502                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5503                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5504                         else
5505                                 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5506
5507                 }
5508         }
5509         if (adev->pm.pcie_mlw_mask == 0) {
5510                 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5511                         adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5512                 } else {
5513                         switch (platform_link_width) {
5514                         case PCIE_LNK_X32:
5515                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5516                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5517                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5518                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5519                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5520                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5521                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5522                                 break;
5523                         case PCIE_LNK_X16:
5524                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5525                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5526                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5527                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5528                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5529                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5530                                 break;
5531                         case PCIE_LNK_X12:
5532                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5533                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5534                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5535                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5536                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5537                                 break;
5538                         case PCIE_LNK_X8:
5539                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5540                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5541                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5542                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5543                                 break;
5544                         case PCIE_LNK_X4:
5545                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5546                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5547                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5548                                 break;
5549                         case PCIE_LNK_X2:
5550                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5551                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5552                                 break;
5553                         case PCIE_LNK_X1:
5554                                 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5555                                 break;
5556                         default:
5557                                 break;
5558                         }
5559                 }
5560         }
5561 }
5562
5563 /**
5564  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5565  *
5566  * @adev: amdgpu_device pointer
5567  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5568  *
5569  * Return true if @peer_adev can access (DMA) @adev through the PCIe
5570  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5571  * @peer_adev.
5572  */
5573 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5574                                       struct amdgpu_device *peer_adev)
5575 {
5576 #ifdef CONFIG_HSA_AMD_P2P
5577         uint64_t address_mask = peer_adev->dev->dma_mask ?
5578                 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5579         resource_size_t aper_limit =
5580                 adev->gmc.aper_base + adev->gmc.aper_size - 1;
5581         bool p2p_access =
5582                 !adev->gmc.xgmi.connected_to_cpu &&
5583                 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
5584
5585         return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5586                 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5587                 !(adev->gmc.aper_base & address_mask ||
5588                   aper_limit & address_mask));
5589 #else
5590         return false;
5591 #endif
5592 }
5593
5594 int amdgpu_device_baco_enter(struct drm_device *dev)
5595 {
5596         struct amdgpu_device *adev = drm_to_adev(dev);
5597         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5598
5599         if (!amdgpu_device_supports_baco(dev))
5600                 return -ENOTSUPP;
5601
5602         if (ras && adev->ras_enabled &&
5603             adev->nbio.funcs->enable_doorbell_interrupt)
5604                 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5605
5606         return amdgpu_dpm_baco_enter(adev);
5607 }
5608
5609 int amdgpu_device_baco_exit(struct drm_device *dev)
5610 {
5611         struct amdgpu_device *adev = drm_to_adev(dev);
5612         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5613         int ret = 0;
5614
5615         if (!amdgpu_device_supports_baco(dev))
5616                 return -ENOTSUPP;
5617
5618         ret = amdgpu_dpm_baco_exit(adev);
5619         if (ret)
5620                 return ret;
5621
5622         if (ras && adev->ras_enabled &&
5623             adev->nbio.funcs->enable_doorbell_interrupt)
5624                 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5625
5626         if (amdgpu_passthrough(adev) &&
5627             adev->nbio.funcs->clear_doorbell_interrupt)
5628                 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5629
5630         return 0;
5631 }
5632
5633 /**
5634  * amdgpu_pci_error_detected - Called when a PCI error is detected.
5635  * @pdev: PCI device struct
5636  * @state: PCI channel state
5637  *
5638  * Description: Called when a PCI error is detected.
5639  *
5640  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5641  */
5642 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5643 {
5644         struct drm_device *dev = pci_get_drvdata(pdev);
5645         struct amdgpu_device *adev = drm_to_adev(dev);
5646         int i;
5647
5648         DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5649
5650         if (adev->gmc.xgmi.num_physical_nodes > 1) {
5651                 DRM_WARN("No support for XGMI hive yet...");
5652                 return PCI_ERS_RESULT_DISCONNECT;
5653         }
5654
5655         adev->pci_channel_state = state;
5656
5657         switch (state) {
5658         case pci_channel_io_normal:
5659                 return PCI_ERS_RESULT_CAN_RECOVER;
5660         /* Fatal error, prepare for slot reset */
5661         case pci_channel_io_frozen:
5662                 /*
5663                  * Locking adev->reset_domain->sem will prevent any external access
5664                  * to GPU during PCI error recovery
5665                  */
5666                 amdgpu_device_lock_reset_domain(adev->reset_domain);
5667                 amdgpu_device_set_mp1_state(adev);
5668
5669                 /*
5670                  * Block any work scheduling as we do for regular GPU reset
5671                  * for the duration of the recovery
5672                  */
5673                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5674                         struct amdgpu_ring *ring = adev->rings[i];
5675
5676                         if (!ring || !ring->sched.thread)
5677                                 continue;
5678
5679                         drm_sched_stop(&ring->sched, NULL);
5680                 }
5681                 atomic_inc(&adev->gpu_reset_counter);
5682                 return PCI_ERS_RESULT_NEED_RESET;
5683         case pci_channel_io_perm_failure:
5684                 /* Permanent error, prepare for device removal */
5685                 return PCI_ERS_RESULT_DISCONNECT;
5686         }
5687
5688         return PCI_ERS_RESULT_NEED_RESET;
5689 }
5690
5691 /**
5692  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5693  * @pdev: pointer to PCI device
5694  */
5695 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5696 {
5697
5698         DRM_INFO("PCI error: mmio enabled callback!!\n");
5699
5700         /* TODO - dump whatever for debugging purposes */
5701
5702         /* This called only if amdgpu_pci_error_detected returns
5703          * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5704          * works, no need to reset slot.
5705          */
5706
5707         return PCI_ERS_RESULT_RECOVERED;
5708 }
5709
5710 /**
5711  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5712  * @pdev: PCI device struct
5713  *
5714  * Description: This routine is called by the pci error recovery
5715  * code after the PCI slot has been reset, just before we
5716  * should resume normal operations.
5717  */
5718 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5719 {
5720         struct drm_device *dev = pci_get_drvdata(pdev);
5721         struct amdgpu_device *adev = drm_to_adev(dev);
5722         int r, i;
5723         struct amdgpu_reset_context reset_context;
5724         u32 memsize;
5725         struct list_head device_list;
5726
5727         DRM_INFO("PCI error: slot reset callback!!\n");
5728
5729         memset(&reset_context, 0, sizeof(reset_context));
5730
5731         INIT_LIST_HEAD(&device_list);
5732         list_add_tail(&adev->reset_list, &device_list);
5733
5734         /* wait for asic to come out of reset */
5735         msleep(500);
5736
5737         /* Restore PCI confspace */
5738         amdgpu_device_load_pci_state(pdev);
5739
5740         /* confirm  ASIC came out of reset */
5741         for (i = 0; i < adev->usec_timeout; i++) {
5742                 memsize = amdgpu_asic_get_config_memsize(adev);
5743
5744                 if (memsize != 0xffffffff)
5745                         break;
5746                 udelay(1);
5747         }
5748         if (memsize == 0xffffffff) {
5749                 r = -ETIME;
5750                 goto out;
5751         }
5752
5753         reset_context.method = AMD_RESET_METHOD_NONE;
5754         reset_context.reset_req_dev = adev;
5755         set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5756         set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5757
5758         adev->no_hw_access = true;
5759         r = amdgpu_device_pre_asic_reset(adev, &reset_context);
5760         adev->no_hw_access = false;
5761         if (r)
5762                 goto out;
5763
5764         r = amdgpu_do_asic_reset(&device_list, &reset_context);
5765
5766 out:
5767         if (!r) {
5768                 if (amdgpu_device_cache_pci_state(adev->pdev))
5769                         pci_restore_state(adev->pdev);
5770
5771                 DRM_INFO("PCIe error recovery succeeded\n");
5772         } else {
5773                 DRM_ERROR("PCIe error recovery failed, err:%d", r);
5774                 amdgpu_device_unset_mp1_state(adev);
5775                 amdgpu_device_unlock_reset_domain(adev->reset_domain);
5776         }
5777
5778         return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5779 }
5780
5781 /**
5782  * amdgpu_pci_resume() - resume normal ops after PCI reset
5783  * @pdev: pointer to PCI device
5784  *
5785  * Called when the error recovery driver tells us that its
5786  * OK to resume normal operation.
5787  */
5788 void amdgpu_pci_resume(struct pci_dev *pdev)
5789 {
5790         struct drm_device *dev = pci_get_drvdata(pdev);
5791         struct amdgpu_device *adev = drm_to_adev(dev);
5792         int i;
5793
5794
5795         DRM_INFO("PCI error: resume callback!!\n");
5796
5797         /* Only continue execution for the case of pci_channel_io_frozen */
5798         if (adev->pci_channel_state != pci_channel_io_frozen)
5799                 return;
5800
5801         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5802                 struct amdgpu_ring *ring = adev->rings[i];
5803
5804                 if (!ring || !ring->sched.thread)
5805                         continue;
5806
5807                 drm_sched_start(&ring->sched, true);
5808         }
5809
5810         amdgpu_device_unset_mp1_state(adev);
5811         amdgpu_device_unlock_reset_domain(adev->reset_domain);
5812 }
5813
5814 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5815 {
5816         struct drm_device *dev = pci_get_drvdata(pdev);
5817         struct amdgpu_device *adev = drm_to_adev(dev);
5818         int r;
5819
5820         r = pci_save_state(pdev);
5821         if (!r) {
5822                 kfree(adev->pci_state);
5823
5824                 adev->pci_state = pci_store_saved_state(pdev);
5825
5826                 if (!adev->pci_state) {
5827                         DRM_ERROR("Failed to store PCI saved state");
5828                         return false;
5829                 }
5830         } else {
5831                 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5832                 return false;
5833         }
5834
5835         return true;
5836 }
5837
5838 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5839 {
5840         struct drm_device *dev = pci_get_drvdata(pdev);
5841         struct amdgpu_device *adev = drm_to_adev(dev);
5842         int r;
5843
5844         if (!adev->pci_state)
5845                 return false;
5846
5847         r = pci_load_saved_state(pdev, adev->pci_state);
5848
5849         if (!r) {
5850                 pci_restore_state(pdev);
5851         } else {
5852                 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5853                 return false;
5854         }
5855
5856         return true;
5857 }
5858
5859 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5860                 struct amdgpu_ring *ring)
5861 {
5862 #ifdef CONFIG_X86_64
5863         if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
5864                 return;
5865 #endif
5866         if (adev->gmc.xgmi.connected_to_cpu)
5867                 return;
5868
5869         if (ring && ring->funcs->emit_hdp_flush)
5870                 amdgpu_ring_emit_hdp_flush(ring);
5871         else
5872                 amdgpu_asic_flush_hdp(adev, ring);
5873 }
5874
5875 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5876                 struct amdgpu_ring *ring)
5877 {
5878 #ifdef CONFIG_X86_64
5879         if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
5880                 return;
5881 #endif
5882         if (adev->gmc.xgmi.connected_to_cpu)
5883                 return;
5884
5885         amdgpu_asic_invalidate_hdp(adev, ring);
5886 }
5887
5888 int amdgpu_in_reset(struct amdgpu_device *adev)
5889 {
5890         return atomic_read(&adev->reset_domain->in_gpu_reset);
5891 }
5892
5893 /**
5894  * amdgpu_device_halt() - bring hardware to some kind of halt state
5895  *
5896  * @adev: amdgpu_device pointer
5897  *
5898  * Bring hardware to some kind of halt state so that no one can touch it
5899  * any more. It will help to maintain error context when error occurred.
5900  * Compare to a simple hang, the system will keep stable at least for SSH
5901  * access. Then it should be trivial to inspect the hardware state and
5902  * see what's going on. Implemented as following:
5903  *
5904  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5905  *    clears all CPU mappings to device, disallows remappings through page faults
5906  * 2. amdgpu_irq_disable_all() disables all interrupts
5907  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5908  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5909  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5910  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5911  *    flush any in flight DMA operations
5912  */
5913 void amdgpu_device_halt(struct amdgpu_device *adev)
5914 {
5915         struct pci_dev *pdev = adev->pdev;
5916         struct drm_device *ddev = adev_to_drm(adev);
5917
5918         amdgpu_xcp_dev_unplug(adev);
5919         drm_dev_unplug(ddev);
5920
5921         amdgpu_irq_disable_all(adev);
5922
5923         amdgpu_fence_driver_hw_fini(adev);
5924
5925         adev->no_hw_access = true;
5926
5927         amdgpu_device_unmap_mmio(adev);
5928
5929         pci_disable_device(pdev);
5930         pci_wait_for_pending_transaction(pdev);
5931 }
5932
5933 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5934                                 u32 reg)
5935 {
5936         unsigned long flags, address, data;
5937         u32 r;
5938
5939         address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5940         data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5941
5942         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5943         WREG32(address, reg * 4);
5944         (void)RREG32(address);
5945         r = RREG32(data);
5946         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5947         return r;
5948 }
5949
5950 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5951                                 u32 reg, u32 v)
5952 {
5953         unsigned long flags, address, data;
5954
5955         address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5956         data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5957
5958         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5959         WREG32(address, reg * 4);
5960         (void)RREG32(address);
5961         WREG32(data, v);
5962         (void)RREG32(data);
5963         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5964 }
5965
5966 /**
5967  * amdgpu_device_switch_gang - switch to a new gang
5968  * @adev: amdgpu_device pointer
5969  * @gang: the gang to switch to
5970  *
5971  * Try to switch to a new gang.
5972  * Returns: NULL if we switched to the new gang or a reference to the current
5973  * gang leader.
5974  */
5975 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
5976                                             struct dma_fence *gang)
5977 {
5978         struct dma_fence *old = NULL;
5979
5980         do {
5981                 dma_fence_put(old);
5982                 rcu_read_lock();
5983                 old = dma_fence_get_rcu_safe(&adev->gang_submit);
5984                 rcu_read_unlock();
5985
5986                 if (old == gang)
5987                         break;
5988
5989                 if (!dma_fence_is_signaled(old))
5990                         return old;
5991
5992         } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
5993                          old, gang) != old);
5994
5995         dma_fence_put(old);
5996         return NULL;
5997 }
5998
5999 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6000 {
6001         switch (adev->asic_type) {
6002 #ifdef CONFIG_DRM_AMDGPU_SI
6003         case CHIP_HAINAN:
6004 #endif
6005         case CHIP_TOPAZ:
6006                 /* chips with no display hardware */
6007                 return false;
6008 #ifdef CONFIG_DRM_AMDGPU_SI
6009         case CHIP_TAHITI:
6010         case CHIP_PITCAIRN:
6011         case CHIP_VERDE:
6012         case CHIP_OLAND:
6013 #endif
6014 #ifdef CONFIG_DRM_AMDGPU_CIK
6015         case CHIP_BONAIRE:
6016         case CHIP_HAWAII:
6017         case CHIP_KAVERI:
6018         case CHIP_KABINI:
6019         case CHIP_MULLINS:
6020 #endif
6021         case CHIP_TONGA:
6022         case CHIP_FIJI:
6023         case CHIP_POLARIS10:
6024         case CHIP_POLARIS11:
6025         case CHIP_POLARIS12:
6026         case CHIP_VEGAM:
6027         case CHIP_CARRIZO:
6028         case CHIP_STONEY:
6029                 /* chips with display hardware */
6030                 return true;
6031         default:
6032                 /* IP discovery */
6033                 if (!adev->ip_versions[DCE_HWIP][0] ||
6034                     (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6035                         return false;
6036                 return true;
6037         }
6038 }
6039
6040 uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6041                 uint32_t inst, uint32_t reg_addr, char reg_name[],
6042                 uint32_t expected_value, uint32_t mask)
6043 {
6044         uint32_t ret = 0;
6045         uint32_t old_ = 0;
6046         uint32_t tmp_ = RREG32(reg_addr);
6047         uint32_t loop = adev->usec_timeout;
6048
6049         while ((tmp_ & (mask)) != (expected_value)) {
6050                 if (old_ != tmp_) {
6051                         loop = adev->usec_timeout;
6052                         old_ = tmp_;
6053                 } else
6054                         udelay(1);
6055                 tmp_ = RREG32(reg_addr);
6056                 loop--;
6057                 if (!loop) {
6058                         DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6059                                   inst, reg_name, (uint32_t)expected_value,
6060                                   (uint32_t)(tmp_ & (mask)));
6061                         ret = -ETIMEDOUT;
6062                         break;
6063                 }
6064         }
6065         return ret;
6066 }