drivers/gpu/drm/amd/amdkfd/kfd_device.c

   1 /*
   2  * Copyright 2014 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 #include <linux/bsearch.h>
  24 #include <linux/pci.h>
  25 #include <linux/slab.h>
  26 #include "kfd_priv.h"
  27 #include "kfd_device_queue_manager.h"
  28 #include "kfd_pm4_headers_vi.h"
  29 #include "kfd_pm4_headers_aldebaran.h"
  30 #include "cwsr_trap_handler.h"
  31 #include "kfd_iommu.h"
  32 #include "amdgpu_amdkfd.h"
  33 #include "kfd_smi_events.h"
  34 #include "kfd_migrate.h"
  35
  36 #define MQD_SIZE_ALIGNED 768
  37
  38 /*
  39  * kfd_locked is used to lock the kfd driver during suspend or reset
  40  * once locked, kfd driver will stop any further GPU execution.
  41  * create process (open) will return -EAGAIN.
  42  */
  43 static atomic_t kfd_locked = ATOMIC_INIT(0);
  44
  45 #ifdef CONFIG_DRM_AMDGPU_CIK
  46 extern const struct kfd2kgd_calls gfx_v7_kfd2kgd;
  47 #endif
  48 extern const struct kfd2kgd_calls gfx_v8_kfd2kgd;
  49 extern const struct kfd2kgd_calls gfx_v9_kfd2kgd;
  50 extern const struct kfd2kgd_calls arcturus_kfd2kgd;
  51 extern const struct kfd2kgd_calls aldebaran_kfd2kgd;
  52 extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
  53 extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
  54
  55 static const struct kfd2kgd_calls *kfd2kgd_funcs[] = {
  56 #ifdef KFD_SUPPORT_IOMMU_V2
  57 #ifdef CONFIG_DRM_AMDGPU_CIK
  58         [CHIP_KAVERI] = &gfx_v7_kfd2kgd,
  59 #endif
  60         [CHIP_CARRIZO] = &gfx_v8_kfd2kgd,
  61         [CHIP_RAVEN] = &gfx_v9_kfd2kgd,
  62 #endif
  63 #ifdef CONFIG_DRM_AMDGPU_CIK
  64         [CHIP_HAWAII] = &gfx_v7_kfd2kgd,
  65 #endif
  66         [CHIP_TONGA] = &gfx_v8_kfd2kgd,
  67         [CHIP_FIJI] = &gfx_v8_kfd2kgd,
  68         [CHIP_POLARIS10] = &gfx_v8_kfd2kgd,
  69         [CHIP_POLARIS11] = &gfx_v8_kfd2kgd,
  70         [CHIP_POLARIS12] = &gfx_v8_kfd2kgd,
  71         [CHIP_VEGAM] = &gfx_v8_kfd2kgd,
  72         [CHIP_VEGA10] = &gfx_v9_kfd2kgd,
  73         [CHIP_VEGA12] = &gfx_v9_kfd2kgd,
  74         [CHIP_VEGA20] = &gfx_v9_kfd2kgd,
  75         [CHIP_RENOIR] = &gfx_v9_kfd2kgd,
  76         [CHIP_ARCTURUS] = &arcturus_kfd2kgd,
  77         [CHIP_ALDEBARAN] = &aldebaran_kfd2kgd,
  78         [CHIP_NAVI10] = &gfx_v10_kfd2kgd,
  79         [CHIP_NAVI12] = &gfx_v10_kfd2kgd,
  80         [CHIP_NAVI14] = &gfx_v10_kfd2kgd,
  81         [CHIP_SIENNA_CICHLID] = &gfx_v10_3_kfd2kgd,
  82         [CHIP_NAVY_FLOUNDER] = &gfx_v10_3_kfd2kgd,
  83         [CHIP_VANGOGH] = &gfx_v10_3_kfd2kgd,
  84         [CHIP_DIMGREY_CAVEFISH] = &gfx_v10_3_kfd2kgd,
  85 };
  86
  87 #ifdef KFD_SUPPORT_IOMMU_V2
  88 static const struct kfd_device_info kaveri_device_info = {
  89         .asic_family = CHIP_KAVERI,
  90         .asic_name = "kaveri",
  91         .max_pasid_bits = 16,
  92         /* max num of queues for KV.TODO should be a dynamic value */
  93         .max_no_of_hqd  = 24,
  94         .doorbell_size  = 4,
  95         .ih_ring_entry_size = 4 * sizeof(uint32_t),
  96         .event_interrupt_class = &event_interrupt_class_cik,
  97         .num_of_watch_points = 4,
  98         .mqd_size_aligned = MQD_SIZE_ALIGNED,
  99         .supports_cwsr = false,
 100         .needs_iommu_device = true,
 101         .needs_pci_atomics = false,
 102         .num_sdma_engines = 2,
 103         .num_xgmi_sdma_engines = 0,
 104         .num_sdma_queues_per_engine = 2,
 105 };
 106
 107 static const struct kfd_device_info carrizo_device_info = {
 108         .asic_family = CHIP_CARRIZO,
 109         .asic_name = "carrizo",
 110         .max_pasid_bits = 16,
 111         /* max num of queues for CZ.TODO should be a dynamic value */
 112         .max_no_of_hqd  = 24,
 113         .doorbell_size  = 4,
 114         .ih_ring_entry_size = 4 * sizeof(uint32_t),
 115         .event_interrupt_class = &event_interrupt_class_cik,
 116         .num_of_watch_points = 4,
 117         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 118         .supports_cwsr = true,
 119         .needs_iommu_device = true,
 120         .needs_pci_atomics = false,
 121         .num_sdma_engines = 2,
 122         .num_xgmi_sdma_engines = 0,
 123         .num_sdma_queues_per_engine = 2,
 124 };
 125 #endif
 126
 127 static const struct kfd_device_info raven_device_info = {
 128         .asic_family = CHIP_RAVEN,
 129         .asic_name = "raven",
 130         .max_pasid_bits = 16,
 131         .max_no_of_hqd  = 24,
 132         .doorbell_size  = 8,
 133         .ih_ring_entry_size = 8 * sizeof(uint32_t),
 134         .event_interrupt_class = &event_interrupt_class_v9,
 135         .num_of_watch_points = 4,
 136         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 137         .supports_cwsr = true,
 138         .needs_iommu_device = true,
 139         .needs_pci_atomics = true,
 140         .num_sdma_engines = 1,
 141         .num_xgmi_sdma_engines = 0,
 142         .num_sdma_queues_per_engine = 2,
 143 };
 144
 145 static const struct kfd_device_info hawaii_device_info = {
 146         .asic_family = CHIP_HAWAII,
 147         .asic_name = "hawaii",
 148         .max_pasid_bits = 16,
 149         /* max num of queues for KV.TODO should be a dynamic value */
 150         .max_no_of_hqd  = 24,
 151         .doorbell_size  = 4,
 152         .ih_ring_entry_size = 4 * sizeof(uint32_t),
 153         .event_interrupt_class = &event_interrupt_class_cik,
 154         .num_of_watch_points = 4,
 155         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 156         .supports_cwsr = false,
 157         .needs_iommu_device = false,
 158         .needs_pci_atomics = false,
 159         .num_sdma_engines = 2,
 160         .num_xgmi_sdma_engines = 0,
 161         .num_sdma_queues_per_engine = 2,
 162 };
 163
 164 static const struct kfd_device_info tonga_device_info = {
 165         .asic_family = CHIP_TONGA,
 166         .asic_name = "tonga",
 167         .max_pasid_bits = 16,
 168         .max_no_of_hqd  = 24,
 169         .doorbell_size  = 4,
 170         .ih_ring_entry_size = 4 * sizeof(uint32_t),
 171         .event_interrupt_class = &event_interrupt_class_cik,
 172         .num_of_watch_points = 4,
 173         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 174         .supports_cwsr = false,
 175         .needs_iommu_device = false,
 176         .needs_pci_atomics = true,
 177         .num_sdma_engines = 2,
 178         .num_xgmi_sdma_engines = 0,
 179         .num_sdma_queues_per_engine = 2,
 180 };
 181
 182 static const struct kfd_device_info fiji_device_info = {
 183         .asic_family = CHIP_FIJI,
 184         .asic_name = "fiji",
 185         .max_pasid_bits = 16,
 186         .max_no_of_hqd  = 24,
 187         .doorbell_size  = 4,
 188         .ih_ring_entry_size = 4 * sizeof(uint32_t),
 189         .event_interrupt_class = &event_interrupt_class_cik,
 190         .num_of_watch_points = 4,
 191         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 192         .supports_cwsr = true,
 193         .needs_iommu_device = false,
 194         .needs_pci_atomics = true,
 195         .num_sdma_engines = 2,
 196         .num_xgmi_sdma_engines = 0,
 197         .num_sdma_queues_per_engine = 2,
 198 };
 199
 200 static const struct kfd_device_info fiji_vf_device_info = {
 201         .asic_family = CHIP_FIJI,
 202         .asic_name = "fiji",
 203         .max_pasid_bits = 16,
 204         .max_no_of_hqd  = 24,
 205         .doorbell_size  = 4,
 206         .ih_ring_entry_size = 4 * sizeof(uint32_t),
 207         .event_interrupt_class = &event_interrupt_class_cik,
 208         .num_of_watch_points = 4,
 209         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 210         .supports_cwsr = true,
 211         .needs_iommu_device = false,
 212         .needs_pci_atomics = false,
 213         .num_sdma_engines = 2,
 214         .num_xgmi_sdma_engines = 0,
 215         .num_sdma_queues_per_engine = 2,
 216 };
 217
 218
 219 static const struct kfd_device_info polaris10_device_info = {
 220         .asic_family = CHIP_POLARIS10,
 221         .asic_name = "polaris10",
 222         .max_pasid_bits = 16,
 223         .max_no_of_hqd  = 24,
 224         .doorbell_size  = 4,
 225         .ih_ring_entry_size = 4 * sizeof(uint32_t),
 226         .event_interrupt_class = &event_interrupt_class_cik,
 227         .num_of_watch_points = 4,
 228         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 229         .supports_cwsr = true,
 230         .needs_iommu_device = false,
 231         .needs_pci_atomics = true,
 232         .num_sdma_engines = 2,
 233         .num_xgmi_sdma_engines = 0,
 234         .num_sdma_queues_per_engine = 2,
 235 };
 236
 237 static const struct kfd_device_info polaris10_vf_device_info = {
 238         .asic_family = CHIP_POLARIS10,
 239         .asic_name = "polaris10",
 240         .max_pasid_bits = 16,
 241         .max_no_of_hqd  = 24,
 242         .doorbell_size  = 4,
 243         .ih_ring_entry_size = 4 * sizeof(uint32_t),
 244         .event_interrupt_class = &event_interrupt_class_cik,
 245         .num_of_watch_points = 4,
 246         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 247         .supports_cwsr = true,
 248         .needs_iommu_device = false,
 249         .needs_pci_atomics = false,
 250         .num_sdma_engines = 2,
 251         .num_xgmi_sdma_engines = 0,
 252         .num_sdma_queues_per_engine = 2,
 253 };
 254
 255 static const struct kfd_device_info polaris11_device_info = {
 256         .asic_family = CHIP_POLARIS11,
 257         .asic_name = "polaris11",
 258         .max_pasid_bits = 16,
 259         .max_no_of_hqd  = 24,
 260         .doorbell_size  = 4,
 261         .ih_ring_entry_size = 4 * sizeof(uint32_t),
 262         .event_interrupt_class = &event_interrupt_class_cik,
 263         .num_of_watch_points = 4,
 264         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 265         .supports_cwsr = true,
 266         .needs_iommu_device = false,
 267         .needs_pci_atomics = true,
 268         .num_sdma_engines = 2,
 269         .num_xgmi_sdma_engines = 0,
 270         .num_sdma_queues_per_engine = 2,
 271 };
 272
 273 static const struct kfd_device_info polaris12_device_info = {
 274         .asic_family = CHIP_POLARIS12,
 275         .asic_name = "polaris12",
 276         .max_pasid_bits = 16,
 277         .max_no_of_hqd  = 24,
 278         .doorbell_size  = 4,
 279         .ih_ring_entry_size = 4 * sizeof(uint32_t),
 280         .event_interrupt_class = &event_interrupt_class_cik,
 281         .num_of_watch_points = 4,
 282         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 283         .supports_cwsr = true,
 284         .needs_iommu_device = false,
 285         .needs_pci_atomics = true,
 286         .num_sdma_engines = 2,
 287         .num_xgmi_sdma_engines = 0,
 288         .num_sdma_queues_per_engine = 2,
 289 };
 290
 291 static const struct kfd_device_info vegam_device_info = {
 292         .asic_family = CHIP_VEGAM,
 293         .asic_name = "vegam",
 294         .max_pasid_bits = 16,
 295         .max_no_of_hqd  = 24,
 296         .doorbell_size  = 4,
 297         .ih_ring_entry_size = 4 * sizeof(uint32_t),
 298         .event_interrupt_class = &event_interrupt_class_cik,
 299         .num_of_watch_points = 4,
 300         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 301         .supports_cwsr = true,
 302         .needs_iommu_device = false,
 303         .needs_pci_atomics = true,
 304         .num_sdma_engines = 2,
 305         .num_xgmi_sdma_engines = 0,
 306         .num_sdma_queues_per_engine = 2,
 307 };
 308
 309 static const struct kfd_device_info vega10_device_info = {
 310         .asic_family = CHIP_VEGA10,
 311         .asic_name = "vega10",
 312         .max_pasid_bits = 16,
 313         .max_no_of_hqd  = 24,
 314         .doorbell_size  = 8,
 315         .ih_ring_entry_size = 8 * sizeof(uint32_t),
 316         .event_interrupt_class = &event_interrupt_class_v9,
 317         .num_of_watch_points = 4,
 318         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 319         .supports_cwsr = true,
 320         .needs_iommu_device = false,
 321         .needs_pci_atomics = false,
 322         .num_sdma_engines = 2,
 323         .num_xgmi_sdma_engines = 0,
 324         .num_sdma_queues_per_engine = 2,
 325 };
 326
 327 static const struct kfd_device_info vega10_vf_device_info = {
 328         .asic_family = CHIP_VEGA10,
 329         .asic_name = "vega10",
 330         .max_pasid_bits = 16,
 331         .max_no_of_hqd  = 24,
 332         .doorbell_size  = 8,
 333         .ih_ring_entry_size = 8 * sizeof(uint32_t),
 334         .event_interrupt_class = &event_interrupt_class_v9,
 335         .num_of_watch_points = 4,
 336         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 337         .supports_cwsr = true,
 338         .needs_iommu_device = false,
 339         .needs_pci_atomics = false,
 340         .num_sdma_engines = 2,
 341         .num_xgmi_sdma_engines = 0,
 342         .num_sdma_queues_per_engine = 2,
 343 };
 344
 345 static const struct kfd_device_info vega12_device_info = {
 346         .asic_family = CHIP_VEGA12,
 347         .asic_name = "vega12",
 348         .max_pasid_bits = 16,
 349         .max_no_of_hqd  = 24,
 350         .doorbell_size  = 8,
 351         .ih_ring_entry_size = 8 * sizeof(uint32_t),
 352         .event_interrupt_class = &event_interrupt_class_v9,
 353         .num_of_watch_points = 4,
 354         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 355         .supports_cwsr = true,
 356         .needs_iommu_device = false,
 357         .needs_pci_atomics = false,
 358         .num_sdma_engines = 2,
 359         .num_xgmi_sdma_engines = 0,
 360         .num_sdma_queues_per_engine = 2,
 361 };
 362
 363 static const struct kfd_device_info vega20_device_info = {
 364         .asic_family = CHIP_VEGA20,
 365         .asic_name = "vega20",
 366         .max_pasid_bits = 16,
 367         .max_no_of_hqd  = 24,
 368         .doorbell_size  = 8,
 369         .ih_ring_entry_size = 8 * sizeof(uint32_t),
 370         .event_interrupt_class = &event_interrupt_class_v9,
 371         .num_of_watch_points = 4,
 372         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 373         .supports_cwsr = true,
 374         .needs_iommu_device = false,
 375         .needs_pci_atomics = false,
 376         .num_sdma_engines = 2,
 377         .num_xgmi_sdma_engines = 0,
 378         .num_sdma_queues_per_engine = 8,
 379 };
 380
 381 static const struct kfd_device_info arcturus_device_info = {
 382         .asic_family = CHIP_ARCTURUS,
 383         .asic_name = "arcturus",
 384         .max_pasid_bits = 16,
 385         .max_no_of_hqd  = 24,
 386         .doorbell_size  = 8,
 387         .ih_ring_entry_size = 8 * sizeof(uint32_t),
 388         .event_interrupt_class = &event_interrupt_class_v9,
 389         .num_of_watch_points = 4,
 390         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 391         .supports_cwsr = true,
 392         .needs_iommu_device = false,
 393         .needs_pci_atomics = false,
 394         .num_sdma_engines = 2,
 395         .num_xgmi_sdma_engines = 6,
 396         .num_sdma_queues_per_engine = 8,
 397 };
 398
 399 static const struct kfd_device_info aldebaran_device_info = {
 400         .asic_family = CHIP_ALDEBARAN,
 401         .asic_name = "aldebaran",
 402         .max_pasid_bits = 16,
 403         .max_no_of_hqd  = 24,
 404         .doorbell_size  = 8,
 405         .ih_ring_entry_size = 8 * sizeof(uint32_t),
 406         .event_interrupt_class = &event_interrupt_class_v9,
 407         .num_of_watch_points = 4,
 408         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 409         .supports_cwsr = true,
 410         .needs_iommu_device = false,
 411         .needs_pci_atomics = false,
 412         .num_sdma_engines = 2,
 413         .num_xgmi_sdma_engines = 3,
 414         .num_sdma_queues_per_engine = 8,
 415 };
 416
 417 static const struct kfd_device_info renoir_device_info = {
 418         .asic_family = CHIP_RENOIR,
 419         .asic_name = "renoir",
 420         .max_pasid_bits = 16,
 421         .max_no_of_hqd  = 24,
 422         .doorbell_size  = 8,
 423         .ih_ring_entry_size = 8 * sizeof(uint32_t),
 424         .event_interrupt_class = &event_interrupt_class_v9,
 425         .num_of_watch_points = 4,
 426         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 427         .supports_cwsr = true,
 428         .needs_iommu_device = false,
 429         .needs_pci_atomics = false,
 430         .num_sdma_engines = 1,
 431         .num_xgmi_sdma_engines = 0,
 432         .num_sdma_queues_per_engine = 2,
 433 };
 434
 435 static const struct kfd_device_info navi10_device_info = {
 436         .asic_family = CHIP_NAVI10,
 437         .asic_name = "navi10",
 438         .max_pasid_bits = 16,
 439         .max_no_of_hqd  = 24,
 440         .doorbell_size  = 8,
 441         .ih_ring_entry_size = 8 * sizeof(uint32_t),
 442         .event_interrupt_class = &event_interrupt_class_v9,
 443         .num_of_watch_points = 4,
 444         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 445         .needs_iommu_device = false,
 446         .supports_cwsr = true,
 447         .needs_pci_atomics = true,
 448         .num_sdma_engines = 2,
 449         .num_xgmi_sdma_engines = 0,
 450         .num_sdma_queues_per_engine = 8,
 451 };
 452
 453 static const struct kfd_device_info navi12_device_info = {
 454         .asic_family = CHIP_NAVI12,
 455         .asic_name = "navi12",
 456         .max_pasid_bits = 16,
 457         .max_no_of_hqd  = 24,
 458         .doorbell_size  = 8,
 459         .ih_ring_entry_size = 8 * sizeof(uint32_t),
 460         .event_interrupt_class = &event_interrupt_class_v9,
 461         .num_of_watch_points = 4,
 462         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 463         .needs_iommu_device = false,
 464         .supports_cwsr = true,
 465         .needs_pci_atomics = true,
 466         .num_sdma_engines = 2,
 467         .num_xgmi_sdma_engines = 0,
 468         .num_sdma_queues_per_engine = 8,
 469 };
 470
 471 static const struct kfd_device_info navi14_device_info = {
 472         .asic_family = CHIP_NAVI14,
 473         .asic_name = "navi14",
 474         .max_pasid_bits = 16,
 475         .max_no_of_hqd  = 24,
 476         .doorbell_size  = 8,
 477         .ih_ring_entry_size = 8 * sizeof(uint32_t),
 478         .event_interrupt_class = &event_interrupt_class_v9,
 479         .num_of_watch_points = 4,
 480         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 481         .needs_iommu_device = false,
 482         .supports_cwsr = true,
 483         .needs_pci_atomics = true,
 484         .num_sdma_engines = 2,
 485         .num_xgmi_sdma_engines = 0,
 486         .num_sdma_queues_per_engine = 8,
 487 };
 488
 489 static const struct kfd_device_info sienna_cichlid_device_info = {
 490         .asic_family = CHIP_SIENNA_CICHLID,
 491         .asic_name = "sienna_cichlid",
 492         .max_pasid_bits = 16,
 493         .max_no_of_hqd  = 24,
 494         .doorbell_size  = 8,
 495         .ih_ring_entry_size = 8 * sizeof(uint32_t),
 496         .event_interrupt_class = &event_interrupt_class_v9,
 497         .num_of_watch_points = 4,
 498         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 499         .needs_iommu_device = false,
 500         .supports_cwsr = true,
 501         .needs_pci_atomics = true,
 502         .num_sdma_engines = 4,
 503         .num_xgmi_sdma_engines = 0,
 504         .num_sdma_queues_per_engine = 8,
 505 };
 506
 507 static const struct kfd_device_info navy_flounder_device_info = {
 508         .asic_family = CHIP_NAVY_FLOUNDER,
 509         .asic_name = "navy_flounder",
 510         .max_pasid_bits = 16,
 511         .max_no_of_hqd  = 24,
 512         .doorbell_size  = 8,
 513         .ih_ring_entry_size = 8 * sizeof(uint32_t),
 514         .event_interrupt_class = &event_interrupt_class_v9,
 515         .num_of_watch_points = 4,
 516         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 517         .needs_iommu_device = false,
 518         .supports_cwsr = true,
 519         .needs_pci_atomics = true,
 520         .num_sdma_engines = 2,
 521         .num_xgmi_sdma_engines = 0,
 522         .num_sdma_queues_per_engine = 8,
 523 };
 524
 525 static const struct kfd_device_info vangogh_device_info = {
 526         .asic_family = CHIP_VANGOGH,
 527         .asic_name = "vangogh",
 528         .max_pasid_bits = 16,
 529         .max_no_of_hqd  = 24,
 530         .doorbell_size  = 8,
 531         .ih_ring_entry_size = 8 * sizeof(uint32_t),
 532         .event_interrupt_class = &event_interrupt_class_v9,
 533         .num_of_watch_points = 4,
 534         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 535         .needs_iommu_device = false,
 536         .supports_cwsr = true,
 537         .needs_pci_atomics = false,
 538         .num_sdma_engines = 1,
 539         .num_xgmi_sdma_engines = 0,
 540         .num_sdma_queues_per_engine = 2,
 541 };
 542
 543 static const struct kfd_device_info dimgrey_cavefish_device_info = {
 544         .asic_family = CHIP_DIMGREY_CAVEFISH,
 545         .asic_name = "dimgrey_cavefish",
 546         .max_pasid_bits = 16,
 547         .max_no_of_hqd  = 24,
 548         .doorbell_size  = 8,
 549         .ih_ring_entry_size = 8 * sizeof(uint32_t),
 550         .event_interrupt_class = &event_interrupt_class_v9,
 551         .num_of_watch_points = 4,
 552         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 553         .needs_iommu_device = false,
 554         .supports_cwsr = true,
 555         .needs_pci_atomics = true,
 556         .num_sdma_engines = 2,
 557         .num_xgmi_sdma_engines = 0,
 558         .num_sdma_queues_per_engine = 8,
 559 };
 560
 561 static const struct kfd_device_info beige_goby_device_info = {
 562         .asic_family = CHIP_BEIGE_GOBY,
 563         .asic_name = "beige_goby",
 564         .max_pasid_bits = 16,
 565         .max_no_of_hqd  = 24,
 566         .doorbell_size  = 8,
 567         .ih_ring_entry_size = 8 * sizeof(uint32_t),
 568         .event_interrupt_class = &event_interrupt_class_v9,
 569         .num_of_watch_points = 4,
 570         .mqd_size_aligned = MQD_SIZE_ALIGNED,
 571         .needs_iommu_device = false,
 572         .supports_cwsr = true,
 573         .needs_pci_atomics = true,
 574         .num_sdma_engines = 1,
 575         .num_xgmi_sdma_engines = 0,
 576         .num_sdma_queues_per_engine = 8,
 577 };
 578
 579
 580 /* For each entry, [0] is regular and [1] is virtualisation device. */
 581 static const struct kfd_device_info *kfd_supported_devices[][2] = {
 582 #ifdef KFD_SUPPORT_IOMMU_V2
 583         [CHIP_KAVERI] = {&kaveri_device_info, NULL},
 584         [CHIP_CARRIZO] = {&carrizo_device_info, NULL},
 585 #endif
 586         [CHIP_RAVEN] = {&raven_device_info, NULL},
 587         [CHIP_HAWAII] = {&hawaii_device_info, NULL},
 588         [CHIP_TONGA] = {&tonga_device_info, NULL},
 589         [CHIP_FIJI] = {&fiji_device_info, &fiji_vf_device_info},
 590         [CHIP_POLARIS10] = {&polaris10_device_info, &polaris10_vf_device_info},
 591         [CHIP_POLARIS11] = {&polaris11_device_info, NULL},
 592         [CHIP_POLARIS12] = {&polaris12_device_info, NULL},
 593         [CHIP_VEGAM] = {&vegam_device_info, NULL},
 594         [CHIP_VEGA10] = {&vega10_device_info, &vega10_vf_device_info},
 595         [CHIP_VEGA12] = {&vega12_device_info, NULL},
 596         [CHIP_VEGA20] = {&vega20_device_info, NULL},
 597         [CHIP_RENOIR] = {&renoir_device_info, NULL},
 598         [CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info},
 599         [CHIP_ALDEBARAN] = {&aldebaran_device_info, &aldebaran_device_info},
 600         [CHIP_NAVI10] = {&navi10_device_info, NULL},
 601         [CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info},
 602         [CHIP_NAVI14] = {&navi14_device_info, NULL},
 603         [CHIP_SIENNA_CICHLID] = {&sienna_cichlid_device_info, &sienna_cichlid_device_info},
 604         [CHIP_NAVY_FLOUNDER] = {&navy_flounder_device_info, &navy_flounder_device_info},
 605         [CHIP_VANGOGH] = {&vangogh_device_info, NULL},
 606         [CHIP_DIMGREY_CAVEFISH] = {&dimgrey_cavefish_device_info, &dimgrey_cavefish_device_info},
 607         [CHIP_BEIGE_GOBY] = {&beige_goby_device_info, &beige_goby_device_info},
 608 };
 609
 610 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
 611                                 unsigned int chunk_size);
 612 static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
 613
 614 static int kfd_resume(struct kfd_dev *kfd);
 615
 616 struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
 617         struct pci_dev *pdev, unsigned int asic_type, bool vf)
 618 {
 619         struct kfd_dev *kfd;
 620         const struct kfd_device_info *device_info;
 621         const struct kfd2kgd_calls *f2g;
 622
 623         if (asic_type >= sizeof(kfd_supported_devices) / (sizeof(void *) * 2)
 624                 || asic_type >= sizeof(kfd2kgd_funcs) / sizeof(void *)) {
 625                 dev_err(kfd_device, "asic_type %d out of range\n", asic_type);
 626                 return NULL; /* asic_type out of range */
 627         }
 628
 629         device_info = kfd_supported_devices[asic_type][vf];
 630         f2g = kfd2kgd_funcs[asic_type];
 631
 632         if (!device_info || !f2g) {
 633                 dev_err(kfd_device, "%s %s not supported in kfd\n",
 634                         amdgpu_asic_name[asic_type], vf ? "VF" : "");
 635                 return NULL;
 636         }
 637
 638         kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
 639         if (!kfd)
 640                 return NULL;
 641
 642         /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
 643          * 32 and 64-bit requests are possible and must be
 644          * supported.
 645          */
 646         kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kgd);
 647         if (device_info->needs_pci_atomics &&
 648             !kfd->pci_atomic_requested) {
 649                 dev_info(kfd_device,
 650                          "skipped device %x:%x, PCI rejects atomics\n",
 651                          pdev->vendor, pdev->device);
 652                 kfree(kfd);
 653                 return NULL;
 654         }
 655
 656         kfd->kgd = kgd;
 657         kfd->device_info = device_info;
 658         kfd->pdev = pdev;
 659         kfd->init_complete = false;
 660         kfd->kfd2kgd = f2g;
 661         atomic_set(&kfd->compute_profile, 0);
 662
 663         mutex_init(&kfd->doorbell_mutex);
 664         memset(&kfd->doorbell_available_index, 0,
 665                 sizeof(kfd->doorbell_available_index));
 666
 667         atomic_set(&kfd->sram_ecc_flag, 0);
 668
 669         ida_init(&kfd->doorbell_ida);
 670
 671         return kfd;
 672 }
 673
 674 static void kfd_cwsr_init(struct kfd_dev *kfd)
 675 {
 676         if (cwsr_enable && kfd->device_info->supports_cwsr) {
 677                 if (kfd->device_info->asic_family < CHIP_VEGA10) {
 678                         BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
 679                         kfd->cwsr_isa = cwsr_trap_gfx8_hex;
 680                         kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
 681                 } else if (kfd->device_info->asic_family == CHIP_ARCTURUS) {
 682                         BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE);
 683                         kfd->cwsr_isa = cwsr_trap_arcturus_hex;
 684                         kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
 685                 } else if (kfd->device_info->asic_family == CHIP_ALDEBARAN) {
 686                         BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE);
 687                         kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
 688                         kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
 689                 } else if (kfd->device_info->asic_family < CHIP_NAVI10) {
 690                         BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
 691                         kfd->cwsr_isa = cwsr_trap_gfx9_hex;
 692                         kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
 693                 } else if (kfd->device_info->asic_family < CHIP_SIENNA_CICHLID) {
 694                         BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
 695                         kfd->cwsr_isa = cwsr_trap_nv1x_hex;
 696                         kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
 697                 } else {
 698                         BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
 699                         kfd->cwsr_isa = cwsr_trap_gfx10_hex;
 700                         kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
 701                 }
 702
 703                 kfd->cwsr_enabled = true;
 704         }
 705 }
 706
 707 static int kfd_gws_init(struct kfd_dev *kfd)
 708 {
 709         int ret = 0;
 710
 711         if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
 712                 return 0;
 713
 714         if (hws_gws_support
 715                 || (kfd->device_info->asic_family == CHIP_VEGA10
 716                         && kfd->mec2_fw_version >= 0x81b3)
 717                 || (kfd->device_info->asic_family >= CHIP_VEGA12
 718                         && kfd->device_info->asic_family <= CHIP_RAVEN
 719                         && kfd->mec2_fw_version >= 0x1b3)
 720                 || (kfd->device_info->asic_family == CHIP_ARCTURUS
 721                         && kfd->mec2_fw_version >= 0x30)
 722                 || (kfd->device_info->asic_family == CHIP_ALDEBARAN
 723                         && kfd->mec2_fw_version >= 0x28))
 724                 ret = amdgpu_amdkfd_alloc_gws(kfd->kgd,
 725                                 amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws);
 726
 727         return ret;
 728 }
 729
 730 static void kfd_smi_init(struct kfd_dev *dev) {
 731         INIT_LIST_HEAD(&dev->smi_clients);
 732         spin_lock_init(&dev->smi_lock);
 733 }
 734
 735 bool kgd2kfd_device_init(struct kfd_dev *kfd,
 736                          struct drm_device *ddev,
 737                          const struct kgd2kfd_shared_resources *gpu_resources)
 738 {
 739         unsigned int size, map_process_packet_size;
 740
 741         kfd->ddev = ddev;
 742         kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
 743                         KGD_ENGINE_MEC1);
 744         kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
 745                         KGD_ENGINE_MEC2);
 746         kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
 747                         KGD_ENGINE_SDMA1);
 748         kfd->shared_resources = *gpu_resources;
 749
 750         kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
 751         kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
 752         kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
 753                         - kfd->vm_info.first_vmid_kfd + 1;
 754
 755         /* Verify module parameters regarding mapped process number*/
 756         if ((hws_max_conc_proc < 0)
 757                         || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
 758                 dev_err(kfd_device,
 759                         "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
 760                         hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
 761                         kfd->vm_info.vmid_num_kfd);
 762                 kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
 763         } else
 764                 kfd->max_proc_per_quantum = hws_max_conc_proc;
 765
 766         /* calculate max size of mqds needed for queues */
 767         size = max_num_of_queues_per_device *
 768                         kfd->device_info->mqd_size_aligned;
 769
 770         /*
 771          * calculate max size of runlist packet.
 772          * There can be only 2 packets at once
 773          */
 774         map_process_packet_size =
 775                         kfd->device_info->asic_family == CHIP_ALDEBARAN ?
 776                                 sizeof(struct pm4_mes_map_process_aldebaran) :
 777                                         sizeof(struct pm4_mes_map_process);
 778         size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size +
 779                 max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
 780                 + sizeof(struct pm4_mes_runlist)) * 2;
 781
 782         /* Add size of HIQ & DIQ */
 783         size += KFD_KERNEL_QUEUE_SIZE * 2;
 784
 785         /* add another 512KB for all other allocations on gart (HPD, fences) */
 786         size += 512 * 1024;
 787
 788         if (amdgpu_amdkfd_alloc_gtt_mem(
 789                         kfd->kgd, size, &kfd->gtt_mem,
 790                         &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
 791                         false)) {
 792                 dev_err(kfd_device, "Could not allocate %d bytes\n", size);
 793                 goto alloc_gtt_mem_failure;
 794         }
 795
 796         dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
 797
 798         /* Initialize GTT sa with 512 byte chunk size */
 799         if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
 800                 dev_err(kfd_device, "Error initializing gtt sub-allocator\n");
 801                 goto kfd_gtt_sa_init_error;
 802         }
 803
 804         if (kfd_doorbell_init(kfd)) {
 805                 dev_err(kfd_device,
 806                         "Error initializing doorbell aperture\n");
 807                 goto kfd_doorbell_error;
 808         }
 809
 810         kfd->hive_id = amdgpu_amdkfd_get_hive_id(kfd->kgd);
 811
 812         kfd->noretry = amdgpu_amdkfd_get_noretry(kfd->kgd);
 813
 814         if (kfd_interrupt_init(kfd)) {
 815                 dev_err(kfd_device, "Error initializing interrupts\n");
 816                 goto kfd_interrupt_error;
 817         }
 818
 819         kfd->dqm = device_queue_manager_init(kfd);
 820         if (!kfd->dqm) {
 821                 dev_err(kfd_device, "Error initializing queue manager\n");
 822                 goto device_queue_manager_error;
 823         }
 824
 825         /* If supported on this device, allocate global GWS that is shared
 826          * by all KFD processes
 827          */
 828         if (kfd_gws_init(kfd)) {
 829                 dev_err(kfd_device, "Could not allocate %d gws\n",
 830                         amdgpu_amdkfd_get_num_gws(kfd->kgd));
 831                 goto gws_error;
 832         }
 833
 834         /* If CRAT is broken, won't set iommu enabled */
 835         kfd_double_confirm_iommu_support(kfd);
 836
 837         if (kfd_iommu_device_init(kfd)) {
 838                 dev_err(kfd_device, "Error initializing iommuv2\n");
 839                 goto device_iommu_error;
 840         }
 841
 842         kfd_cwsr_init(kfd);
 843
 844         svm_migrate_init((struct amdgpu_device *)kfd->kgd);
 845
 846         if (kfd_resume(kfd))
 847                 goto kfd_resume_error;
 848
 849         kfd->dbgmgr = NULL;
 850
 851         if (kfd_topology_add_device(kfd)) {
 852                 dev_err(kfd_device, "Error adding device to topology\n");
 853                 goto kfd_topology_add_device_error;
 854         }
 855
 856         kfd_smi_init(kfd);
 857
 858         kfd->init_complete = true;
 859         dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
 860                  kfd->pdev->device);
 861
 862         pr_debug("Starting kfd with the following scheduling policy %d\n",
 863                 kfd->dqm->sched_policy);
 864
 865         goto out;
 866
 867 kfd_topology_add_device_error:
 868 kfd_resume_error:
 869 device_iommu_error:
 870 gws_error:
 871         device_queue_manager_uninit(kfd->dqm);
 872 device_queue_manager_error:
 873         kfd_interrupt_exit(kfd);
 874 kfd_interrupt_error:
 875         kfd_doorbell_fini(kfd);
 876 kfd_doorbell_error:
 877         kfd_gtt_sa_fini(kfd);
 878 kfd_gtt_sa_init_error:
 879         amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
 880 alloc_gtt_mem_failure:
 881         if (kfd->gws)
 882                 amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
 883         dev_err(kfd_device,
 884                 "device %x:%x NOT added due to errors\n",
 885                 kfd->pdev->vendor, kfd->pdev->device);
 886 out:
 887         return kfd->init_complete;
 888 }
 889
 890 void kgd2kfd_device_exit(struct kfd_dev *kfd)
 891 {
 892         if (kfd->init_complete) {
 893                 kgd2kfd_suspend(kfd, false);
 894                 svm_migrate_fini((struct amdgpu_device *)kfd->kgd);
 895                 device_queue_manager_uninit(kfd->dqm);
 896                 kfd_interrupt_exit(kfd);
 897                 kfd_topology_remove_device(kfd);
 898                 kfd_doorbell_fini(kfd);
 899                 ida_destroy(&kfd->doorbell_ida);
 900                 kfd_gtt_sa_fini(kfd);
 901                 amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
 902                 if (kfd->gws)
 903                         amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
 904         }
 905
 906         kfree(kfd);
 907 }
 908
 909 int kgd2kfd_pre_reset(struct kfd_dev *kfd)
 910 {
 911         if (!kfd->init_complete)
 912                 return 0;
 913
 914         kfd_smi_event_update_gpu_reset(kfd, false);
 915
 916         kfd->dqm->ops.pre_reset(kfd->dqm);
 917
 918         kgd2kfd_suspend(kfd, false);
 919
 920         kfd_signal_reset_event(kfd);
 921         return 0;
 922 }
 923
 924 /*
 925  * Fix me. KFD won't be able to resume existing process for now.
 926  * We will keep all existing process in a evicted state and
 927  * wait the process to be terminated.
 928  */
 929
 930 int kgd2kfd_post_reset(struct kfd_dev *kfd)
 931 {
 932         int ret;
 933
 934         if (!kfd->init_complete)
 935                 return 0;
 936
 937         ret = kfd_resume(kfd);
 938         if (ret)
 939                 return ret;
 940         atomic_dec(&kfd_locked);
 941
 942         atomic_set(&kfd->sram_ecc_flag, 0);
 943
 944         kfd_smi_event_update_gpu_reset(kfd, true);
 945
 946         return 0;
 947 }
 948
 949 bool kfd_is_locked(void)
 950 {
 951         return  (atomic_read(&kfd_locked) > 0);
 952 }
 953
 954 void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
 955 {
 956         if (!kfd->init_complete)
 957                 return;
 958
 959         /* for runtime suspend, skip locking kfd */
 960         if (!run_pm) {
 961                 /* For first KFD device suspend all the KFD processes */
 962                 if (atomic_inc_return(&kfd_locked) == 1)
 963                         kfd_suspend_all_processes();
 964         }
 965
 966         kfd->dqm->ops.stop(kfd->dqm);
 967         kfd_iommu_suspend(kfd);
 968 }
 969
 970 int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
 971 {
 972         int ret, count;
 973
 974         if (!kfd->init_complete)
 975                 return 0;
 976
 977         ret = kfd_resume(kfd);
 978         if (ret)
 979                 return ret;
 980
 981         /* for runtime resume, skip unlocking kfd */
 982         if (!run_pm) {
 983                 count = atomic_dec_return(&kfd_locked);
 984                 WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
 985                 if (count == 0)
 986                         ret = kfd_resume_all_processes();
 987         }
 988
 989         return ret;
 990 }
 991
 992 static int kfd_resume(struct kfd_dev *kfd)
 993 {
 994         int err = 0;
 995
 996         err = kfd_iommu_resume(kfd);
 997         if (err) {
 998                 dev_err(kfd_device,
 999                         "Failed to resume IOMMU for device %x:%x\n",
1000                         kfd->pdev->vendor, kfd->pdev->device);
1001                 return err;
1002         }
1003
1004         err = kfd->dqm->ops.start(kfd->dqm);
1005         if (err) {
1006                 dev_err(kfd_device,
1007                         "Error starting queue manager for device %x:%x\n",
1008                         kfd->pdev->vendor, kfd->pdev->device);
1009                 goto dqm_start_error;
1010         }
1011
1012         return err;
1013
1014 dqm_start_error:
1015         kfd_iommu_suspend(kfd);
1016         return err;
1017 }
1018
1019 static inline void kfd_queue_work(struct workqueue_struct *wq,
1020                                   struct work_struct *work)
1021 {
1022         int cpu, new_cpu;
1023
1024         cpu = new_cpu = smp_processor_id();
1025         do {
1026                 new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
1027                 if (cpu_to_node(new_cpu) == numa_node_id())
1028                         break;
1029         } while (cpu != new_cpu);
1030
1031         queue_work_on(new_cpu, wq, work);
1032 }
1033
1034 /* This is called directly from KGD at ISR. */
1035 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
1036 {
1037         uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE];
1038         bool is_patched = false;
1039         unsigned long flags;
1040
1041         if (!kfd->init_complete)
1042                 return;
1043
1044         if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
1045                 dev_err_once(kfd_device, "Ring entry too small\n");
1046                 return;
1047         }
1048
1049         spin_lock_irqsave(&kfd->interrupt_lock, flags);
1050
1051         if (kfd->interrupts_active
1052             && interrupt_is_wanted(kfd, ih_ring_entry,
1053                                    patched_ihre, &is_patched)
1054             && enqueue_ih_ring_entry(kfd,
1055                                      is_patched ? patched_ihre : ih_ring_entry))
1056                 kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work);
1057
1058         spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
1059 }
1060
1061 int kgd2kfd_quiesce_mm(struct mm_struct *mm)
1062 {
1063         struct kfd_process *p;
1064         int r;
1065
1066         /* Because we are called from arbitrary context (workqueue) as opposed
1067          * to process context, kfd_process could attempt to exit while we are
1068          * running so the lookup function increments the process ref count.
1069          */
1070         p = kfd_lookup_process_by_mm(mm);
1071         if (!p)
1072                 return -ESRCH;
1073
1074         WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
1075         r = kfd_process_evict_queues(p);
1076
1077         kfd_unref_process(p);
1078         return r;
1079 }
1080
1081 int kgd2kfd_resume_mm(struct mm_struct *mm)
1082 {
1083         struct kfd_process *p;
1084         int r;
1085
1086         /* Because we are called from arbitrary context (workqueue) as opposed
1087          * to process context, kfd_process could attempt to exit while we are
1088          * running so the lookup function increments the process ref count.
1089          */
1090         p = kfd_lookup_process_by_mm(mm);
1091         if (!p)
1092                 return -ESRCH;
1093
1094         r = kfd_process_restore_queues(p);
1095
1096         kfd_unref_process(p);
1097         return r;
1098 }
1099
1100 /** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
1101  *   prepare for safe eviction of KFD BOs that belong to the specified
1102  *   process.
1103  *
1104  * @mm: mm_struct that identifies the specified KFD process
1105  * @fence: eviction fence attached to KFD process BOs
1106  *
1107  */
1108 int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
1109                                                struct dma_fence *fence)
1110 {
1111         struct kfd_process *p;
1112         unsigned long active_time;
1113         unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS);
1114
1115         if (!fence)
1116                 return -EINVAL;
1117
1118         if (dma_fence_is_signaled(fence))
1119                 return 0;
1120
1121         p = kfd_lookup_process_by_mm(mm);
1122         if (!p)
1123                 return -ENODEV;
1124
1125         if (fence->seqno == p->last_eviction_seqno)
1126                 goto out;
1127
1128         p->last_eviction_seqno = fence->seqno;
1129
1130         /* Avoid KFD process starvation. Wait for at least
1131          * PROCESS_ACTIVE_TIME_MS before evicting the process again
1132          */
1133         active_time = get_jiffies_64() - p->last_restore_timestamp;
1134         if (delay_jiffies > active_time)
1135                 delay_jiffies -= active_time;
1136         else
1137                 delay_jiffies = 0;
1138
1139         /* During process initialization eviction_work.dwork is initialized
1140          * to kfd_evict_bo_worker
1141          */
1142         WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies",
1143              p->lead_thread->pid, delay_jiffies);
1144         schedule_delayed_work(&p->eviction_work, delay_jiffies);
1145 out:
1146         kfd_unref_process(p);
1147         return 0;
1148 }
1149
1150 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
1151                                 unsigned int chunk_size)
1152 {
1153         unsigned int num_of_longs;
1154
1155         if (WARN_ON(buf_size < chunk_size))
1156                 return -EINVAL;
1157         if (WARN_ON(buf_size == 0))
1158                 return -EINVAL;
1159         if (WARN_ON(chunk_size == 0))
1160                 return -EINVAL;
1161
1162         kfd->gtt_sa_chunk_size = chunk_size;
1163         kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
1164
1165         num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) /
1166                 BITS_PER_LONG;
1167
1168         kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL);
1169
1170         if (!kfd->gtt_sa_bitmap)
1171                 return -ENOMEM;
1172
1173         pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
1174                         kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
1175
1176         mutex_init(&kfd->gtt_sa_lock);
1177
1178         return 0;
1179
1180 }
1181
1182 static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
1183 {
1184         mutex_destroy(&kfd->gtt_sa_lock);
1185         kfree(kfd->gtt_sa_bitmap);
1186 }
1187
1188 static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
1189                                                 unsigned int bit_num,
1190                                                 unsigned int chunk_size)
1191 {
1192         return start_addr + bit_num * chunk_size;
1193 }
1194
1195 static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
1196                                                 unsigned int bit_num,
1197                                                 unsigned int chunk_size)
1198 {
1199         return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
1200 }
1201
1202 int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
1203                         struct kfd_mem_obj **mem_obj)
1204 {
1205         unsigned int found, start_search, cur_size;
1206
1207         if (size == 0)
1208                 return -EINVAL;
1209
1210         if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
1211                 return -ENOMEM;
1212
1213         *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
1214         if (!(*mem_obj))
1215                 return -ENOMEM;
1216
1217         pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size);
1218
1219         start_search = 0;
1220
1221         mutex_lock(&kfd->gtt_sa_lock);
1222
1223 kfd_gtt_restart_search:
1224         /* Find the first chunk that is free */
1225         found = find_next_zero_bit(kfd->gtt_sa_bitmap,
1226                                         kfd->gtt_sa_num_of_chunks,
1227                                         start_search);
1228
1229         pr_debug("Found = %d\n", found);
1230
1231         /* If there wasn't any free chunk, bail out */
1232         if (found == kfd->gtt_sa_num_of_chunks)
1233                 goto kfd_gtt_no_free_chunk;
1234
1235         /* Update fields of mem_obj */
1236         (*mem_obj)->range_start = found;
1237         (*mem_obj)->range_end = found;
1238         (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
1239                                         kfd->gtt_start_gpu_addr,
1240                                         found,
1241                                         kfd->gtt_sa_chunk_size);
1242         (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
1243                                         kfd->gtt_start_cpu_ptr,
1244                                         found,
1245                                         kfd->gtt_sa_chunk_size);
1246
1247         pr_debug("gpu_addr = %p, cpu_addr = %p\n",
1248                         (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);
1249
1250         /* If we need only one chunk, mark it as allocated and get out */
1251         if (size <= kfd->gtt_sa_chunk_size) {
1252                 pr_debug("Single bit\n");
1253                 set_bit(found, kfd->gtt_sa_bitmap);
1254                 goto kfd_gtt_out;
1255         }
1256
1257         /* Otherwise, try to see if we have enough contiguous chunks */
1258         cur_size = size - kfd->gtt_sa_chunk_size;
1259         do {
1260                 (*mem_obj)->range_end =
1261                         find_next_zero_bit(kfd->gtt_sa_bitmap,
1262                                         kfd->gtt_sa_num_of_chunks, ++found);
1263                 /*
1264                  * If next free chunk is not contiguous than we need to
1265                  * restart our search from the last free chunk we found (which
1266                  * wasn't contiguous to the previous ones
1267                  */
1268                 if ((*mem_obj)->range_end != found) {
1269                         start_search = found;
1270                         goto kfd_gtt_restart_search;
1271                 }
1272
1273                 /*
1274                  * If we reached end of buffer, bail out with error
1275                  */
1276                 if (found == kfd->gtt_sa_num_of_chunks)
1277                         goto kfd_gtt_no_free_chunk;
1278
1279                 /* Check if we don't need another chunk */
1280                 if (cur_size <= kfd->gtt_sa_chunk_size)
1281                         cur_size = 0;
1282                 else
1283                         cur_size -= kfd->gtt_sa_chunk_size;
1284
1285         } while (cur_size > 0);
1286
1287         pr_debug("range_start = %d, range_end = %d\n",
1288                 (*mem_obj)->range_start, (*mem_obj)->range_end);
1289
1290         /* Mark the chunks as allocated */
1291         for (found = (*mem_obj)->range_start;
1292                 found <= (*mem_obj)->range_end;
1293                 found++)
1294                 set_bit(found, kfd->gtt_sa_bitmap);
1295
1296 kfd_gtt_out:
1297         mutex_unlock(&kfd->gtt_sa_lock);
1298         return 0;
1299
1300 kfd_gtt_no_free_chunk:
1301         pr_debug("Allocation failed with mem_obj = %p\n", *mem_obj);
1302         mutex_unlock(&kfd->gtt_sa_lock);
1303         kfree(*mem_obj);
1304         return -ENOMEM;
1305 }
1306
1307 int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
1308 {
1309         unsigned int bit;
1310
1311         /* Act like kfree when trying to free a NULL object */
1312         if (!mem_obj)
1313                 return 0;
1314
1315         pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n",
1316                         mem_obj, mem_obj->range_start, mem_obj->range_end);
1317
1318         mutex_lock(&kfd->gtt_sa_lock);
1319
1320         /* Mark the chunks as free */
1321         for (bit = mem_obj->range_start;
1322                 bit <= mem_obj->range_end;
1323                 bit++)
1324                 clear_bit(bit, kfd->gtt_sa_bitmap);
1325
1326         mutex_unlock(&kfd->gtt_sa_lock);
1327
1328         kfree(mem_obj);
1329         return 0;
1330 }
1331
1332 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
1333 {
1334         if (kfd)
1335                 atomic_inc(&kfd->sram_ecc_flag);
1336 }
1337
1338 void kfd_inc_compute_active(struct kfd_dev *kfd)
1339 {
1340         if (atomic_inc_return(&kfd->compute_profile) == 1)
1341                 amdgpu_amdkfd_set_compute_idle(kfd->kgd, false);
1342 }
1343
1344 void kfd_dec_compute_active(struct kfd_dev *kfd)
1345 {
1346         int count = atomic_dec_return(&kfd->compute_profile);
1347
1348         if (count == 0)
1349                 amdgpu_amdkfd_set_compute_idle(kfd->kgd, true);
1350         WARN_ONCE(count < 0, "Compute profile ref. count error");
1351 }
1352
1353 void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
1354 {
1355         if (kfd && kfd->init_complete)
1356                 kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
1357 }
1358
1359 #if defined(CONFIG_DEBUG_FS)
1360
1361 /* This function will send a package to HIQ to hang the HWS
1362  * which will trigger a GPU reset and bring the HWS back to normal state
1363  */
1364 int kfd_debugfs_hang_hws(struct kfd_dev *dev)
1365 {
1366         int r = 0;
1367
1368         if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) {
1369                 pr_err("HWS is not enabled");
1370                 return -EINVAL;
1371         }
1372
1373         r = pm_debugfs_hang_hws(&dev->dqm->packets);
1374         if (!r)
1375                 r = dqm_debugfs_execute_queues(dev->dqm);
1376
1377         return r;
1378 }
1379
1380 #endif