drivers/gpu/drm/amd/amdkfd/kfd_process.c

   1 /*
   2  * Copyright 2014 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 #include <linux/mutex.h>
  24 #include <linux/log2.h>
  25 #include <linux/sched.h>
  26 #include <linux/sched/mm.h>
  27 #include <linux/sched/task.h>
  28 #include <linux/mmu_context.h>
  29 #include <linux/slab.h>
  30 #include <linux/amd-iommu.h>
  31 #include <linux/notifier.h>
  32 #include <linux/compat.h>
  33 #include <linux/mman.h>
  34 #include <linux/file.h>
  35 #include <linux/pm_runtime.h>
  36 #include "amdgpu_amdkfd.h"
  37 #include "amdgpu.h"
  38 #include "kfd_svm.h"
  39
  40 struct mm_struct;
  41
  42 #include "kfd_priv.h"
  43 #include "kfd_device_queue_manager.h"
  44 #include "kfd_dbgmgr.h"
  45 #include "kfd_iommu.h"
  46 #include "kfd_svm.h"
  47
  48 /*
  49  * List of struct kfd_process (field kfd_process).
  50  * Unique/indexed by mm_struct*
  51  */
  52 DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
  53 static DEFINE_MUTEX(kfd_processes_mutex);
  54
  55 DEFINE_SRCU(kfd_processes_srcu);
  56
  57 /* For process termination handling */
  58 static struct workqueue_struct *kfd_process_wq;
  59
  60 /* Ordered, single-threaded workqueue for restoring evicted
  61  * processes. Restoring multiple processes concurrently under memory
  62  * pressure can lead to processes blocking each other from validating
  63  * their BOs and result in a live-lock situation where processes
  64  * remain evicted indefinitely.
  65  */
  66 static struct workqueue_struct *kfd_restore_wq;
  67
  68 static struct kfd_process *find_process(const struct task_struct *thread);
  69 static void kfd_process_ref_release(struct kref *ref);
  70 static struct kfd_process *create_process(const struct task_struct *thread);
  71 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
  72
  73 static void evict_process_worker(struct work_struct *work);
  74 static void restore_process_worker(struct work_struct *work);
  75
  76 struct kfd_procfs_tree {
  77         struct kobject *kobj;
  78 };
  79
  80 static struct kfd_procfs_tree procfs;
  81
  82 /*
  83  * Structure for SDMA activity tracking
  84  */
  85 struct kfd_sdma_activity_handler_workarea {
  86         struct work_struct sdma_activity_work;
  87         struct kfd_process_device *pdd;
  88         uint64_t sdma_activity_counter;
  89 };
  90
  91 struct temp_sdma_queue_list {
  92         uint64_t __user *rptr;
  93         uint64_t sdma_val;
  94         unsigned int queue_id;
  95         struct list_head list;
  96 };
  97
  98 static void kfd_sdma_activity_worker(struct work_struct *work)
  99 {
 100         struct kfd_sdma_activity_handler_workarea *workarea;
 101         struct kfd_process_device *pdd;
 102         uint64_t val;
 103         struct mm_struct *mm;
 104         struct queue *q;
 105         struct qcm_process_device *qpd;
 106         struct device_queue_manager *dqm;
 107         int ret = 0;
 108         struct temp_sdma_queue_list sdma_q_list;
 109         struct temp_sdma_queue_list *sdma_q, *next;
 110
 111         workarea = container_of(work, struct kfd_sdma_activity_handler_workarea,
 112                                 sdma_activity_work);
 113         if (!workarea)
 114                 return;
 115
 116         pdd = workarea->pdd;
 117         if (!pdd)
 118                 return;
 119         dqm = pdd->dev->dqm;
 120         qpd = &pdd->qpd;
 121         if (!dqm || !qpd)
 122                 return;
 123         /*
 124          * Total SDMA activity is current SDMA activity + past SDMA activity
 125          * Past SDMA count is stored in pdd.
 126          * To get the current activity counters for all active SDMA queues,
 127          * we loop over all SDMA queues and get their counts from user-space.
 128          *
 129          * We cannot call get_user() with dqm_lock held as it can cause
 130          * a circular lock dependency situation. To read the SDMA stats,
 131          * we need to do the following:
 132          *
 133          * 1. Create a temporary list of SDMA queue nodes from the qpd->queues_list,
 134          *    with dqm_lock/dqm_unlock().
 135          * 2. Call get_user() for each node in temporary list without dqm_lock.
 136          *    Save the SDMA count for each node and also add the count to the total
 137          *    SDMA count counter.
 138          *    Its possible, during this step, a few SDMA queue nodes got deleted
 139          *    from the qpd->queues_list.
 140          * 3. Do a second pass over qpd->queues_list to check if any nodes got deleted.
 141          *    If any node got deleted, its SDMA count would be captured in the sdma
 142          *    past activity counter. So subtract the SDMA counter stored in step 2
 143          *    for this node from the total SDMA count.
 144          */
 145         INIT_LIST_HEAD(&sdma_q_list.list);
 146
 147         /*
 148          * Create the temp list of all SDMA queues
 149          */
 150         dqm_lock(dqm);
 151
 152         list_for_each_entry(q, &qpd->queues_list, list) {
 153                 if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) &&
 154                     (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI))
 155                         continue;
 156
 157                 sdma_q = kzalloc(sizeof(struct temp_sdma_queue_list), GFP_KERNEL);
 158                 if (!sdma_q) {
 159                         dqm_unlock(dqm);
 160                         goto cleanup;
 161                 }
 162
 163                 INIT_LIST_HEAD(&sdma_q->list);
 164                 sdma_q->rptr = (uint64_t __user *)q->properties.read_ptr;
 165                 sdma_q->queue_id = q->properties.queue_id;
 166                 list_add_tail(&sdma_q->list, &sdma_q_list.list);
 167         }
 168
 169         /*
 170          * If the temp list is empty, then no SDMA queues nodes were found in
 171          * qpd->queues_list. Return the past activity count as the total sdma
 172          * count
 173          */
 174         if (list_empty(&sdma_q_list.list)) {
 175                 workarea->sdma_activity_counter = pdd->sdma_past_activity_counter;
 176                 dqm_unlock(dqm);
 177                 return;
 178         }
 179
 180         dqm_unlock(dqm);
 181
 182         /*
 183          * Get the usage count for each SDMA queue in temp_list.
 184          */
 185         mm = get_task_mm(pdd->process->lead_thread);
 186         if (!mm)
 187                 goto cleanup;
 188
 189         kthread_use_mm(mm);
 190
 191         list_for_each_entry(sdma_q, &sdma_q_list.list, list) {
 192                 val = 0;
 193                 ret = read_sdma_queue_counter(sdma_q->rptr, &val);
 194                 if (ret) {
 195                         pr_debug("Failed to read SDMA queue active counter for queue id: %d",
 196                                  sdma_q->queue_id);
 197                 } else {
 198                         sdma_q->sdma_val = val;
 199                         workarea->sdma_activity_counter += val;
 200                 }
 201         }
 202
 203         kthread_unuse_mm(mm);
 204         mmput(mm);
 205
 206         /*
 207          * Do a second iteration over qpd_queues_list to check if any SDMA
 208          * nodes got deleted while fetching SDMA counter.
 209          */
 210         dqm_lock(dqm);
 211
 212         workarea->sdma_activity_counter += pdd->sdma_past_activity_counter;
 213
 214         list_for_each_entry(q, &qpd->queues_list, list) {
 215                 if (list_empty(&sdma_q_list.list))
 216                         break;
 217
 218                 if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) &&
 219                     (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI))
 220                         continue;
 221
 222                 list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
 223                         if (((uint64_t __user *)q->properties.read_ptr == sdma_q->rptr) &&
 224                              (sdma_q->queue_id == q->properties.queue_id)) {
 225                                 list_del(&sdma_q->list);
 226                                 kfree(sdma_q);
 227                                 break;
 228                         }
 229                 }
 230         }
 231
 232         dqm_unlock(dqm);
 233
 234         /*
 235          * If temp list is not empty, it implies some queues got deleted
 236          * from qpd->queues_list during SDMA usage read. Subtract the SDMA
 237          * count for each node from the total SDMA count.
 238          */
 239         list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
 240                 workarea->sdma_activity_counter -= sdma_q->sdma_val;
 241                 list_del(&sdma_q->list);
 242                 kfree(sdma_q);
 243         }
 244
 245         return;
 246
 247 cleanup:
 248         list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
 249                 list_del(&sdma_q->list);
 250                 kfree(sdma_q);
 251         }
 252 }
 253
 254 /**
 255  * @kfd_get_cu_occupancy - Collect number of waves in-flight on this device
 256  * by current process. Translates acquired wave count into number of compute units
 257  * that are occupied.
 258  *
 259  * @atr: Handle of attribute that allows reporting of wave count. The attribute
 260  * handle encapsulates GPU device it is associated with, thereby allowing collection
 261  * of waves in flight, etc
 262  *
 263  * @buffer: Handle of user provided buffer updated with wave count
 264  *
 265  * Return: Number of bytes written to user buffer or an error value
 266  */
 267 static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
 268 {
 269         int cu_cnt;
 270         int wave_cnt;
 271         int max_waves_per_cu;
 272         struct kfd_dev *dev = NULL;
 273         struct kfd_process *proc = NULL;
 274         struct kfd_process_device *pdd = NULL;
 275
 276         pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy);
 277         dev = pdd->dev;
 278         if (dev->kfd2kgd->get_cu_occupancy == NULL)
 279                 return -EINVAL;
 280
 281         cu_cnt = 0;
 282         proc = pdd->process;
 283         if (pdd->qpd.queue_count == 0) {
 284                 pr_debug("Gpu-Id: %d has no active queues for process %d\n",
 285                          dev->id, proc->pasid);
 286                 return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
 287         }
 288
 289         /* Collect wave count from device if it supports */
 290         wave_cnt = 0;
 291         max_waves_per_cu = 0;
 292         dev->kfd2kgd->get_cu_occupancy(dev->kgd, proc->pasid, &wave_cnt,
 293                         &max_waves_per_cu);
 294
 295         /* Translate wave count to number of compute units */
 296         cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
 297         return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
 298 }
 299
 300 static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
 301                                char *buffer)
 302 {
 303         if (strcmp(attr->name, "pasid") == 0) {
 304                 struct kfd_process *p = container_of(attr, struct kfd_process,
 305                                                      attr_pasid);
 306
 307                 return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
 308         } else if (strncmp(attr->name, "vram_", 5) == 0) {
 309                 struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
 310                                                               attr_vram);
 311                 return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
 312         } else if (strncmp(attr->name, "sdma_", 5) == 0) {
 313                 struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
 314                                                               attr_sdma);
 315                 struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler;
 316
 317                 INIT_WORK(&sdma_activity_work_handler.sdma_activity_work,
 318                                         kfd_sdma_activity_worker);
 319
 320                 sdma_activity_work_handler.pdd = pdd;
 321                 sdma_activity_work_handler.sdma_activity_counter = 0;
 322
 323                 schedule_work(&sdma_activity_work_handler.sdma_activity_work);
 324
 325                 flush_work(&sdma_activity_work_handler.sdma_activity_work);
 326
 327                 return snprintf(buffer, PAGE_SIZE, "%llu\n",
 328                                 (sdma_activity_work_handler.sdma_activity_counter)/
 329                                  SDMA_ACTIVITY_DIVISOR);
 330         } else {
 331                 pr_err("Invalid attribute");
 332                 return -EINVAL;
 333         }
 334
 335         return 0;
 336 }
 337
 338 static void kfd_procfs_kobj_release(struct kobject *kobj)
 339 {
 340         kfree(kobj);
 341 }
 342
 343 static const struct sysfs_ops kfd_procfs_ops = {
 344         .show = kfd_procfs_show,
 345 };
 346
 347 static struct kobj_type procfs_type = {
 348         .release = kfd_procfs_kobj_release,
 349         .sysfs_ops = &kfd_procfs_ops,
 350 };
 351
 352 void kfd_procfs_init(void)
 353 {
 354         int ret = 0;
 355
 356         procfs.kobj = kfd_alloc_struct(procfs.kobj);
 357         if (!procfs.kobj)
 358                 return;
 359
 360         ret = kobject_init_and_add(procfs.kobj, &procfs_type,
 361                                    &kfd_device->kobj, "proc");
 362         if (ret) {
 363                 pr_warn("Could not create procfs proc folder");
 364                 /* If we fail to create the procfs, clean up */
 365                 kfd_procfs_shutdown();
 366         }
 367 }
 368
 369 void kfd_procfs_shutdown(void)
 370 {
 371         if (procfs.kobj) {
 372                 kobject_del(procfs.kobj);
 373                 kobject_put(procfs.kobj);
 374                 procfs.kobj = NULL;
 375         }
 376 }
 377
 378 static ssize_t kfd_procfs_queue_show(struct kobject *kobj,
 379                                      struct attribute *attr, char *buffer)
 380 {
 381         struct queue *q = container_of(kobj, struct queue, kobj);
 382
 383         if (!strcmp(attr->name, "size"))
 384                 return snprintf(buffer, PAGE_SIZE, "%llu",
 385                                 q->properties.queue_size);
 386         else if (!strcmp(attr->name, "type"))
 387                 return snprintf(buffer, PAGE_SIZE, "%d", q->properties.type);
 388         else if (!strcmp(attr->name, "gpuid"))
 389                 return snprintf(buffer, PAGE_SIZE, "%u", q->device->id);
 390         else
 391                 pr_err("Invalid attribute");
 392
 393         return 0;
 394 }
 395
 396 static ssize_t kfd_procfs_stats_show(struct kobject *kobj,
 397                                      struct attribute *attr, char *buffer)
 398 {
 399         if (strcmp(attr->name, "evicted_ms") == 0) {
 400                 struct kfd_process_device *pdd = container_of(attr,
 401                                 struct kfd_process_device,
 402                                 attr_evict);
 403                 uint64_t evict_jiffies;
 404
 405                 evict_jiffies = atomic64_read(&pdd->evict_duration_counter);
 406
 407                 return snprintf(buffer,
 408                                 PAGE_SIZE,
 409                                 "%llu\n",
 410                                 jiffies64_to_msecs(evict_jiffies));
 411
 412         /* Sysfs handle that gets CU occupancy is per device */
 413         } else if (strcmp(attr->name, "cu_occupancy") == 0) {
 414                 return kfd_get_cu_occupancy(attr, buffer);
 415         } else {
 416                 pr_err("Invalid attribute");
 417         }
 418
 419         return 0;
 420 }
 421
 422 static struct attribute attr_queue_size = {
 423         .name = "size",
 424         .mode = KFD_SYSFS_FILE_MODE
 425 };
 426
 427 static struct attribute attr_queue_type = {
 428         .name = "type",
 429         .mode = KFD_SYSFS_FILE_MODE
 430 };
 431
 432 static struct attribute attr_queue_gpuid = {
 433         .name = "gpuid",
 434         .mode = KFD_SYSFS_FILE_MODE
 435 };
 436
 437 static struct attribute *procfs_queue_attrs[] = {
 438         &attr_queue_size,
 439         &attr_queue_type,
 440         &attr_queue_gpuid,
 441         NULL
 442 };
 443
 444 static const struct sysfs_ops procfs_queue_ops = {
 445         .show = kfd_procfs_queue_show,
 446 };
 447
 448 static struct kobj_type procfs_queue_type = {
 449         .sysfs_ops = &procfs_queue_ops,
 450         .default_attrs = procfs_queue_attrs,
 451 };
 452
 453 static const struct sysfs_ops procfs_stats_ops = {
 454         .show = kfd_procfs_stats_show,
 455 };
 456
 457 static struct attribute *procfs_stats_attrs[] = {
 458         NULL
 459 };
 460
 461 static struct kobj_type procfs_stats_type = {
 462         .sysfs_ops = &procfs_stats_ops,
 463         .default_attrs = procfs_stats_attrs,
 464 };
 465
 466 int kfd_procfs_add_queue(struct queue *q)
 467 {
 468         struct kfd_process *proc;
 469         int ret;
 470
 471         if (!q || !q->process)
 472                 return -EINVAL;
 473         proc = q->process;
 474
 475         /* Create proc/<pid>/queues/<queue id> folder */
 476         if (!proc->kobj_queues)
 477                 return -EFAULT;
 478         ret = kobject_init_and_add(&q->kobj, &procfs_queue_type,
 479                         proc->kobj_queues, "%u", q->properties.queue_id);
 480         if (ret < 0) {
 481                 pr_warn("Creating proc/<pid>/queues/%u failed",
 482                         q->properties.queue_id);
 483                 kobject_put(&q->kobj);
 484                 return ret;
 485         }
 486
 487         return 0;
 488 }
 489
 490 static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr,
 491                                  char *name)
 492 {
 493         int ret = 0;
 494
 495         if (!p || !attr || !name)
 496                 return -EINVAL;
 497
 498         attr->name = name;
 499         attr->mode = KFD_SYSFS_FILE_MODE;
 500         sysfs_attr_init(attr);
 501
 502         ret = sysfs_create_file(p->kobj, attr);
 503
 504         return ret;
 505 }
 506
 507 static int kfd_procfs_add_sysfs_stats(struct kfd_process *p)
 508 {
 509         int ret = 0;
 510         int i;
 511         char stats_dir_filename[MAX_SYSFS_FILENAME_LEN];
 512
 513         if (!p)
 514                 return -EINVAL;
 515
 516         if (!p->kobj)
 517                 return -EFAULT;
 518
 519         /*
 520          * Create sysfs files for each GPU:
 521          * - proc/<pid>/stats_<gpuid>/
 522          * - proc/<pid>/stats_<gpuid>/evicted_ms
 523          * - proc/<pid>/stats_<gpuid>/cu_occupancy
 524          */
 525         for (i = 0; i < p->n_pdds; i++) {
 526                 struct kfd_process_device *pdd = p->pdds[i];
 527                 struct kobject *kobj_stats;
 528
 529                 snprintf(stats_dir_filename, MAX_SYSFS_FILENAME_LEN,
 530                                 "stats_%u", pdd->dev->id);
 531                 kobj_stats = kfd_alloc_struct(kobj_stats);
 532                 if (!kobj_stats)
 533                         return -ENOMEM;
 534
 535                 ret = kobject_init_and_add(kobj_stats,
 536                                                 &procfs_stats_type,
 537                                                 p->kobj,
 538                                                 stats_dir_filename);
 539
 540                 if (ret) {
 541                         pr_warn("Creating KFD proc/stats_%s folder failed",
 542                                         stats_dir_filename);
 543                         kobject_put(kobj_stats);
 544                         goto err;
 545                 }
 546
 547                 pdd->kobj_stats = kobj_stats;
 548                 pdd->attr_evict.name = "evicted_ms";
 549                 pdd->attr_evict.mode = KFD_SYSFS_FILE_MODE;
 550                 sysfs_attr_init(&pdd->attr_evict);
 551                 ret = sysfs_create_file(kobj_stats, &pdd->attr_evict);
 552                 if (ret)
 553                         pr_warn("Creating eviction stats for gpuid %d failed",
 554                                         (int)pdd->dev->id);
 555
 556                 /* Add sysfs file to report compute unit occupancy */
 557                 if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL) {
 558                         pdd->attr_cu_occupancy.name = "cu_occupancy";
 559                         pdd->attr_cu_occupancy.mode = KFD_SYSFS_FILE_MODE;
 560                         sysfs_attr_init(&pdd->attr_cu_occupancy);
 561                         ret = sysfs_create_file(kobj_stats,
 562                                                 &pdd->attr_cu_occupancy);
 563                         if (ret)
 564                                 pr_warn("Creating %s failed for gpuid: %d",
 565                                         pdd->attr_cu_occupancy.name,
 566                                         (int)pdd->dev->id);
 567                 }
 568         }
 569 err:
 570         return ret;
 571 }
 572
 573
 574 static int kfd_procfs_add_sysfs_files(struct kfd_process *p)
 575 {
 576         int ret = 0;
 577         int i;
 578
 579         if (!p)
 580                 return -EINVAL;
 581
 582         if (!p->kobj)
 583                 return -EFAULT;
 584
 585         /*
 586          * Create sysfs files for each GPU:
 587          * - proc/<pid>/vram_<gpuid>
 588          * - proc/<pid>/sdma_<gpuid>
 589          */
 590         for (i = 0; i < p->n_pdds; i++) {
 591                 struct kfd_process_device *pdd = p->pdds[i];
 592
 593                 snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u",
 594                          pdd->dev->id);
 595                 ret = kfd_sysfs_create_file(p, &pdd->attr_vram, pdd->vram_filename);
 596                 if (ret)
 597                         pr_warn("Creating vram usage for gpu id %d failed",
 598                                 (int)pdd->dev->id);
 599
 600                 snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u",
 601                          pdd->dev->id);
 602                 ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, pdd->sdma_filename);
 603                 if (ret)
 604                         pr_warn("Creating sdma usage for gpu id %d failed",
 605                                 (int)pdd->dev->id);
 606         }
 607
 608         return ret;
 609 }
 610
 611 void kfd_procfs_del_queue(struct queue *q)
 612 {
 613         if (!q)
 614                 return;
 615
 616         kobject_del(&q->kobj);
 617         kobject_put(&q->kobj);
 618 }
 619
 620 int kfd_process_create_wq(void)
 621 {
 622         if (!kfd_process_wq)
 623                 kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
 624         if (!kfd_restore_wq)
 625                 kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
 626
 627         if (!kfd_process_wq || !kfd_restore_wq) {
 628                 kfd_process_destroy_wq();
 629                 return -ENOMEM;
 630         }
 631
 632         return 0;
 633 }
 634
 635 void kfd_process_destroy_wq(void)
 636 {
 637         if (kfd_process_wq) {
 638                 destroy_workqueue(kfd_process_wq);
 639                 kfd_process_wq = NULL;
 640         }
 641         if (kfd_restore_wq) {
 642                 destroy_workqueue(kfd_restore_wq);
 643                 kfd_restore_wq = NULL;
 644         }
 645 }
 646
 647 static void kfd_process_free_gpuvm(struct kgd_mem *mem,
 648                         struct kfd_process_device *pdd)
 649 {
 650         struct kfd_dev *dev = pdd->dev;
 651
 652         amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv);
 653         amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv,
 654                                                NULL);
 655 }
 656
 657 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
 658  *      This function should be only called right after the process
 659  *      is created and when kfd_processes_mutex is still being held
 660  *      to avoid concurrency. Because of that exclusiveness, we do
 661  *      not need to take p->mutex.
 662  */
 663 static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
 664                                    uint64_t gpu_va, uint32_t size,
 665                                    uint32_t flags, void **kptr)
 666 {
 667         struct kfd_dev *kdev = pdd->dev;
 668         struct kgd_mem *mem = NULL;
 669         int handle;
 670         int err;
 671
 672         err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
 673                                                  pdd->drm_priv, &mem, NULL, flags);
 674         if (err)
 675                 goto err_alloc_mem;
 676
 677         err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->drm_priv);
 678         if (err)
 679                 goto err_map_mem;
 680
 681         err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true);
 682         if (err) {
 683                 pr_debug("Sync memory failed, wait interrupted by user signal\n");
 684                 goto sync_memory_failed;
 685         }
 686
 687         /* Create an obj handle so kfd_process_device_remove_obj_handle
 688          * will take care of the bo removal when the process finishes.
 689          * We do not need to take p->mutex, because the process is just
 690          * created and the ioctls have not had the chance to run.
 691          */
 692         handle = kfd_process_device_create_obj_handle(pdd, mem);
 693
 694         if (handle < 0) {
 695                 err = handle;
 696                 goto free_gpuvm;
 697         }
 698
 699         if (kptr) {
 700                 err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd,
 701                                 (struct kgd_mem *)mem, kptr, NULL);
 702                 if (err) {
 703                         pr_debug("Map GTT BO to kernel failed\n");
 704                         goto free_obj_handle;
 705                 }
 706         }
 707
 708         return err;
 709
 710 free_obj_handle:
 711         kfd_process_device_remove_obj_handle(pdd, handle);
 712 free_gpuvm:
 713 sync_memory_failed:
 714         kfd_process_free_gpuvm(mem, pdd);
 715         return err;
 716
 717 err_map_mem:
 718         amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, pdd->drm_priv,
 719                                                NULL);
 720 err_alloc_mem:
 721         *kptr = NULL;
 722         return err;
 723 }
 724
 725 /* kfd_process_device_reserve_ib_mem - Reserve memory inside the
 726  *      process for IB usage The memory reserved is for KFD to submit
 727  *      IB to AMDGPU from kernel.  If the memory is reserved
 728  *      successfully, ib_kaddr will have the CPU/kernel
 729  *      address. Check ib_kaddr before accessing the memory.
 730  */
 731 static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
 732 {
 733         struct qcm_process_device *qpd = &pdd->qpd;
 734         uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT |
 735                         KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
 736                         KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
 737                         KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
 738         void *kaddr;
 739         int ret;
 740
 741         if (qpd->ib_kaddr || !qpd->ib_base)
 742                 return 0;
 743
 744         /* ib_base is only set for dGPU */
 745         ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
 746                                       &kaddr);
 747         if (ret)
 748                 return ret;
 749
 750         qpd->ib_kaddr = kaddr;
 751
 752         return 0;
 753 }
 754
 755 struct kfd_process *kfd_create_process(struct file *filep)
 756 {
 757         struct kfd_process *process;
 758         struct task_struct *thread = current;
 759         int ret;
 760
 761         if (!thread->mm)
 762                 return ERR_PTR(-EINVAL);
 763
 764         /* Only the pthreads threading model is supported. */
 765         if (thread->group_leader->mm != thread->mm)
 766                 return ERR_PTR(-EINVAL);
 767
 768         /*
 769          * take kfd processes mutex before starting of process creation
 770          * so there won't be a case where two threads of the same process
 771          * create two kfd_process structures
 772          */
 773         mutex_lock(&kfd_processes_mutex);
 774
 775         /* A prior open of /dev/kfd could have already created the process. */
 776         process = find_process(thread);
 777         if (process) {
 778                 pr_debug("Process already found\n");
 779         } else {
 780                 process = create_process(thread);
 781                 if (IS_ERR(process))
 782                         goto out;
 783
 784                 ret = kfd_process_init_cwsr_apu(process, filep);
 785                 if (ret)
 786                         goto out_destroy;
 787
 788                 if (!procfs.kobj)
 789                         goto out;
 790
 791                 process->kobj = kfd_alloc_struct(process->kobj);
 792                 if (!process->kobj) {
 793                         pr_warn("Creating procfs kobject failed");
 794                         goto out;
 795                 }
 796                 ret = kobject_init_and_add(process->kobj, &procfs_type,
 797                                            procfs.kobj, "%d",
 798                                            (int)process->lead_thread->pid);
 799                 if (ret) {
 800                         pr_warn("Creating procfs pid directory failed");
 801                         kobject_put(process->kobj);
 802                         goto out;
 803                 }
 804
 805                 process->attr_pasid.name = "pasid";
 806                 process->attr_pasid.mode = KFD_SYSFS_FILE_MODE;
 807                 sysfs_attr_init(&process->attr_pasid);
 808                 ret = sysfs_create_file(process->kobj, &process->attr_pasid);
 809                 if (ret)
 810                         pr_warn("Creating pasid for pid %d failed",
 811                                         (int)process->lead_thread->pid);
 812
 813                 process->kobj_queues = kobject_create_and_add("queues",
 814                                                         process->kobj);
 815                 if (!process->kobj_queues)
 816                         pr_warn("Creating KFD proc/queues folder failed");
 817
 818                 ret = kfd_procfs_add_sysfs_stats(process);
 819                 if (ret)
 820                         pr_warn("Creating sysfs stats dir for pid %d failed",
 821                                 (int)process->lead_thread->pid);
 822
 823                 ret = kfd_procfs_add_sysfs_files(process);
 824                 if (ret)
 825                         pr_warn("Creating sysfs usage file for pid %d failed",
 826                                 (int)process->lead_thread->pid);
 827         }
 828 out:
 829         if (!IS_ERR(process))
 830                 kref_get(&process->ref);
 831         mutex_unlock(&kfd_processes_mutex);
 832
 833         return process;
 834
 835 out_destroy:
 836         hash_del_rcu(&process->kfd_processes);
 837         mutex_unlock(&kfd_processes_mutex);
 838         synchronize_srcu(&kfd_processes_srcu);
 839         /* kfd_process_free_notifier will trigger the cleanup */
 840         mmu_notifier_put(&process->mmu_notifier);
 841         return ERR_PTR(ret);
 842 }
 843
 844 struct kfd_process *kfd_get_process(const struct task_struct *thread)
 845 {
 846         struct kfd_process *process;
 847
 848         if (!thread->mm)
 849                 return ERR_PTR(-EINVAL);
 850
 851         /* Only the pthreads threading model is supported. */
 852         if (thread->group_leader->mm != thread->mm)
 853                 return ERR_PTR(-EINVAL);
 854
 855         process = find_process(thread);
 856         if (!process)
 857                 return ERR_PTR(-EINVAL);
 858
 859         return process;
 860 }
 861
 862 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
 863 {
 864         struct kfd_process *process;
 865
 866         hash_for_each_possible_rcu(kfd_processes_table, process,
 867                                         kfd_processes, (uintptr_t)mm)
 868                 if (process->mm == mm)
 869                         return process;
 870
 871         return NULL;
 872 }
 873
 874 static struct kfd_process *find_process(const struct task_struct *thread)
 875 {
 876         struct kfd_process *p;
 877         int idx;
 878
 879         idx = srcu_read_lock(&kfd_processes_srcu);
 880         p = find_process_by_mm(thread->mm);
 881         srcu_read_unlock(&kfd_processes_srcu, idx);
 882
 883         return p;
 884 }
 885
 886 void kfd_unref_process(struct kfd_process *p)
 887 {
 888         kref_put(&p->ref, kfd_process_ref_release);
 889 }
 890
 891
 892 static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
 893 {
 894         struct kfd_process *p = pdd->process;
 895         void *mem;
 896         int id;
 897         int i;
 898
 899         /*
 900          * Remove all handles from idr and release appropriate
 901          * local memory object
 902          */
 903         idr_for_each_entry(&pdd->alloc_idr, mem, id) {
 904
 905                 for (i = 0; i < p->n_pdds; i++) {
 906                         struct kfd_process_device *peer_pdd = p->pdds[i];
 907
 908                         if (!peer_pdd->drm_priv)
 909                                 continue;
 910                         amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 911                                 peer_pdd->dev->kgd, mem, peer_pdd->drm_priv);
 912                 }
 913
 914                 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem,
 915                                                        pdd->drm_priv, NULL);
 916                 kfd_process_device_remove_obj_handle(pdd, id);
 917         }
 918 }
 919
 920 static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
 921 {
 922         int i;
 923
 924         for (i = 0; i < p->n_pdds; i++)
 925                 kfd_process_device_free_bos(p->pdds[i]);
 926 }
 927
 928 static void kfd_process_destroy_pdds(struct kfd_process *p)
 929 {
 930         int i;
 931
 932         for (i = 0; i < p->n_pdds; i++) {
 933                 struct kfd_process_device *pdd = p->pdds[i];
 934
 935                 pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
 936                                 pdd->dev->id, p->pasid);
 937
 938                 if (pdd->drm_file) {
 939                         amdgpu_amdkfd_gpuvm_release_process_vm(
 940                                         pdd->dev->kgd, pdd->drm_priv);
 941                         fput(pdd->drm_file);
 942                 }
 943
 944                 if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
 945                         free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
 946                                 get_order(KFD_CWSR_TBA_TMA_SIZE));
 947
 948                 kfree(pdd->qpd.doorbell_bitmap);
 949                 idr_destroy(&pdd->alloc_idr);
 950
 951                 kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
 952
 953                 /*
 954                  * before destroying pdd, make sure to report availability
 955                  * for auto suspend
 956                  */
 957                 if (pdd->runtime_inuse) {
 958                         pm_runtime_mark_last_busy(pdd->dev->ddev->dev);
 959                         pm_runtime_put_autosuspend(pdd->dev->ddev->dev);
 960                         pdd->runtime_inuse = false;
 961                 }
 962
 963                 kfree(pdd);
 964                 p->pdds[i] = NULL;
 965         }
 966         p->n_pdds = 0;
 967 }
 968
 969 /* No process locking is needed in this function, because the process
 970  * is not findable any more. We must assume that no other thread is
 971  * using it any more, otherwise we couldn't safely free the process
 972  * structure in the end.
 973  */
 974 static void kfd_process_wq_release(struct work_struct *work)
 975 {
 976         struct kfd_process *p = container_of(work, struct kfd_process,
 977                                              release_work);
 978         int i;
 979
 980         /* Remove the procfs files */
 981         if (p->kobj) {
 982                 sysfs_remove_file(p->kobj, &p->attr_pasid);
 983                 kobject_del(p->kobj_queues);
 984                 kobject_put(p->kobj_queues);
 985                 p->kobj_queues = NULL;
 986
 987                 for (i = 0; i < p->n_pdds; i++) {
 988                         struct kfd_process_device *pdd = p->pdds[i];
 989
 990                         sysfs_remove_file(p->kobj, &pdd->attr_vram);
 991                         sysfs_remove_file(p->kobj, &pdd->attr_sdma);
 992                         sysfs_remove_file(p->kobj, &pdd->attr_evict);
 993                         if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL)
 994                                 sysfs_remove_file(p->kobj, &pdd->attr_cu_occupancy);
 995                         kobject_del(pdd->kobj_stats);
 996                         kobject_put(pdd->kobj_stats);
 997                         pdd->kobj_stats = NULL;
 998                 }
 999
1000                 kobject_del(p->kobj);
1001                 kobject_put(p->kobj);
1002                 p->kobj = NULL;
1003         }
1004
1005         kfd_iommu_unbind_process(p);
1006
1007         kfd_process_free_outstanding_kfd_bos(p);
1008         svm_range_list_fini(p);
1009
1010         kfd_process_destroy_pdds(p);
1011         dma_fence_put(p->ef);
1012
1013         kfd_event_free_process(p);
1014
1015         kfd_pasid_free(p->pasid);
1016         mutex_destroy(&p->mutex);
1017
1018         put_task_struct(p->lead_thread);
1019
1020         kfree(p);
1021 }
1022
1023 static void kfd_process_ref_release(struct kref *ref)
1024 {
1025         struct kfd_process *p = container_of(ref, struct kfd_process, ref);
1026
1027         INIT_WORK(&p->release_work, kfd_process_wq_release);
1028         queue_work(kfd_process_wq, &p->release_work);
1029 }
1030
1031 static struct mmu_notifier *kfd_process_alloc_notifier(struct mm_struct *mm)
1032 {
1033         int idx = srcu_read_lock(&kfd_processes_srcu);
1034         struct kfd_process *p = find_process_by_mm(mm);
1035
1036         srcu_read_unlock(&kfd_processes_srcu, idx);
1037
1038         return p ? &p->mmu_notifier : ERR_PTR(-ESRCH);
1039 }
1040
1041 static void kfd_process_free_notifier(struct mmu_notifier *mn)
1042 {
1043         kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
1044 }
1045
1046 static void kfd_process_notifier_release(struct mmu_notifier *mn,
1047                                         struct mm_struct *mm)
1048 {
1049         struct kfd_process *p;
1050         int i;
1051
1052         /*
1053          * The kfd_process structure can not be free because the
1054          * mmu_notifier srcu is read locked
1055          */
1056         p = container_of(mn, struct kfd_process, mmu_notifier);
1057         if (WARN_ON(p->mm != mm))
1058                 return;
1059
1060         mutex_lock(&kfd_processes_mutex);
1061         hash_del_rcu(&p->kfd_processes);
1062         mutex_unlock(&kfd_processes_mutex);
1063         synchronize_srcu(&kfd_processes_srcu);
1064
1065         cancel_delayed_work_sync(&p->eviction_work);
1066         cancel_delayed_work_sync(&p->restore_work);
1067         cancel_delayed_work_sync(&p->svms.restore_work);
1068
1069         mutex_lock(&p->mutex);
1070
1071         /* Iterate over all process device data structures and if the
1072          * pdd is in debug mode, we should first force unregistration,
1073          * then we will be able to destroy the queues
1074          */
1075         for (i = 0; i < p->n_pdds; i++) {
1076                 struct kfd_dev *dev = p->pdds[i]->dev;
1077
1078                 mutex_lock(kfd_get_dbgmgr_mutex());
1079                 if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
1080                         if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
1081                                 kfd_dbgmgr_destroy(dev->dbgmgr);
1082                                 dev->dbgmgr = NULL;
1083                         }
1084                 }
1085                 mutex_unlock(kfd_get_dbgmgr_mutex());
1086         }
1087
1088         kfd_process_dequeue_from_all_devices(p);
1089         pqm_uninit(&p->pqm);
1090
1091         /* Indicate to other users that MM is no longer valid */
1092         p->mm = NULL;
1093         /* Signal the eviction fence after user mode queues are
1094          * destroyed. This allows any BOs to be freed without
1095          * triggering pointless evictions or waiting for fences.
1096          */
1097         dma_fence_signal(p->ef);
1098
1099         mutex_unlock(&p->mutex);
1100
1101         mmu_notifier_put(&p->mmu_notifier);
1102 }
1103
1104 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
1105         .release = kfd_process_notifier_release,
1106         .alloc_notifier = kfd_process_alloc_notifier,
1107         .free_notifier = kfd_process_free_notifier,
1108 };
1109
1110 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
1111 {
1112         unsigned long  offset;
1113         int i;
1114
1115         for (i = 0; i < p->n_pdds; i++) {
1116                 struct kfd_dev *dev = p->pdds[i]->dev;
1117                 struct qcm_process_device *qpd = &p->pdds[i]->qpd;
1118
1119                 if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
1120                         continue;
1121
1122                 offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
1123                 qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
1124                         KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
1125                         MAP_SHARED, offset);
1126
1127                 if (IS_ERR_VALUE(qpd->tba_addr)) {
1128                         int err = qpd->tba_addr;
1129
1130                         pr_err("Failure to set tba address. error %d.\n", err);
1131                         qpd->tba_addr = 0;
1132                         qpd->cwsr_kaddr = NULL;
1133                         return err;
1134                 }
1135
1136                 memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
1137
1138                 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
1139                 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
1140                         qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
1141         }
1142
1143         return 0;
1144 }
1145
1146 static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
1147 {
1148         struct kfd_dev *dev = pdd->dev;
1149         struct qcm_process_device *qpd = &pdd->qpd;
1150         uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
1151                         | KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
1152                         | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
1153         void *kaddr;
1154         int ret;
1155
1156         if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
1157                 return 0;
1158
1159         /* cwsr_base is only set for dGPU */
1160         ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
1161                                       KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
1162         if (ret)
1163                 return ret;
1164
1165         qpd->cwsr_kaddr = kaddr;
1166         qpd->tba_addr = qpd->cwsr_base;
1167
1168         memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
1169
1170         qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
1171         pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
1172                  qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
1173
1174         return 0;
1175 }
1176
1177 void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
1178                                   uint64_t tba_addr,
1179                                   uint64_t tma_addr)
1180 {
1181         if (qpd->cwsr_kaddr) {
1182                 /* KFD trap handler is bound, record as second-level TBA/TMA
1183                  * in first-level TMA. First-level trap will jump to second.
1184                  */
1185                 uint64_t *tma =
1186                         (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
1187                 tma[0] = tba_addr;
1188                 tma[1] = tma_addr;
1189         } else {
1190                 /* No trap handler bound, bind as first-level TBA/TMA. */
1191                 qpd->tba_addr = tba_addr;
1192                 qpd->tma_addr = tma_addr;
1193         }
1194 }
1195
1196 bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
1197 {
1198         int i;
1199
1200         /* On most GFXv9 GPUs, the retry mode in the SQ must match the
1201          * boot time retry setting. Mixing processes with different
1202          * XNACK/retry settings can hang the GPU.
1203          *
1204          * Different GPUs can have different noretry settings depending
1205          * on HW bugs or limitations. We need to find at least one
1206          * XNACK mode for this process that's compatible with all GPUs.
1207          * Fortunately GPUs with retry enabled (noretry=0) can run code
1208          * built for XNACK-off. On GFXv9 it may perform slower.
1209          *
1210          * Therefore applications built for XNACK-off can always be
1211          * supported and will be our fallback if any GPU does not
1212          * support retry.
1213          */
1214         for (i = 0; i < p->n_pdds; i++) {
1215                 struct kfd_dev *dev = p->pdds[i]->dev;
1216
1217                 /* Only consider GFXv9 and higher GPUs. Older GPUs don't
1218                  * support the SVM APIs and don't need to be considered
1219                  * for the XNACK mode selection.
1220                  */
1221                 if (dev->device_info->asic_family < CHIP_VEGA10)
1222                         continue;
1223                 /* Aldebaran can always support XNACK because it can support
1224                  * per-process XNACK mode selection. But let the dev->noretry
1225                  * setting still influence the default XNACK mode.
1226                  */
1227                 if (supported &&
1228                     dev->device_info->asic_family == CHIP_ALDEBARAN)
1229                         continue;
1230
1231                 /* GFXv10 and later GPUs do not support shader preemption
1232                  * during page faults. This can lead to poor QoS for queue
1233                  * management and memory-manager-related preemptions or
1234                  * even deadlocks.
1235                  */
1236                 if (dev->device_info->asic_family >= CHIP_NAVI10)
1237                         return false;
1238
1239                 if (dev->noretry)
1240                         return false;
1241         }
1242
1243         return true;
1244 }
1245
1246 /*
1247  * On return the kfd_process is fully operational and will be freed when the
1248  * mm is released
1249  */
1250 static struct kfd_process *create_process(const struct task_struct *thread)
1251 {
1252         struct kfd_process *process;
1253         struct mmu_notifier *mn;
1254         int err = -ENOMEM;
1255
1256         process = kzalloc(sizeof(*process), GFP_KERNEL);
1257         if (!process)
1258                 goto err_alloc_process;
1259
1260         kref_init(&process->ref);
1261         mutex_init(&process->mutex);
1262         process->mm = thread->mm;
1263         process->lead_thread = thread->group_leader;
1264         process->n_pdds = 0;
1265         process->svm_disabled = false;
1266         INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
1267         INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
1268         process->last_restore_timestamp = get_jiffies_64();
1269         kfd_event_init_process(process);
1270         process->is_32bit_user_mode = in_compat_syscall();
1271
1272         process->pasid = kfd_pasid_alloc();
1273         if (process->pasid == 0)
1274                 goto err_alloc_pasid;
1275
1276         err = pqm_init(&process->pqm, process);
1277         if (err != 0)
1278                 goto err_process_pqm_init;
1279
1280         /* init process apertures*/
1281         err = kfd_init_apertures(process);
1282         if (err != 0)
1283                 goto err_init_apertures;
1284
1285         /* Check XNACK support after PDDs are created in kfd_init_apertures */
1286         process->xnack_enabled = kfd_process_xnack_mode(process, false);
1287
1288         err = svm_range_list_init(process);
1289         if (err)
1290                 goto err_init_svm_range_list;
1291
1292         /* alloc_notifier needs to find the process in the hash table */
1293         hash_add_rcu(kfd_processes_table, &process->kfd_processes,
1294                         (uintptr_t)process->mm);
1295
1296         /* MMU notifier registration must be the last call that can fail
1297          * because after this point we cannot unwind the process creation.
1298          * After this point, mmu_notifier_put will trigger the cleanup by
1299          * dropping the last process reference in the free_notifier.
1300          */
1301         mn = mmu_notifier_get(&kfd_process_mmu_notifier_ops, process->mm);
1302         if (IS_ERR(mn)) {
1303                 err = PTR_ERR(mn);
1304                 goto err_register_notifier;
1305         }
1306         BUG_ON(mn != &process->mmu_notifier);
1307
1308         get_task_struct(process->lead_thread);
1309
1310         return process;
1311
1312 err_register_notifier:
1313         hash_del_rcu(&process->kfd_processes);
1314         svm_range_list_fini(process);
1315 err_init_svm_range_list:
1316         kfd_process_free_outstanding_kfd_bos(process);
1317         kfd_process_destroy_pdds(process);
1318 err_init_apertures:
1319         pqm_uninit(&process->pqm);
1320 err_process_pqm_init:
1321         kfd_pasid_free(process->pasid);
1322 err_alloc_pasid:
1323         mutex_destroy(&process->mutex);
1324         kfree(process);
1325 err_alloc_process:
1326         return ERR_PTR(err);
1327 }
1328
1329 static int init_doorbell_bitmap(struct qcm_process_device *qpd,
1330                         struct kfd_dev *dev)
1331 {
1332         unsigned int i;
1333         int range_start = dev->shared_resources.non_cp_doorbells_start;
1334         int range_end = dev->shared_resources.non_cp_doorbells_end;
1335
1336         if (!KFD_IS_SOC15(dev->device_info->asic_family))
1337                 return 0;
1338
1339         qpd->doorbell_bitmap =
1340                 kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
1341                                      BITS_PER_BYTE), GFP_KERNEL);
1342         if (!qpd->doorbell_bitmap)
1343                 return -ENOMEM;
1344
1345         /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
1346         pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
1347         pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
1348                         range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
1349                         range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
1350
1351         for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
1352                 if (i >= range_start && i <= range_end) {
1353                         set_bit(i, qpd->doorbell_bitmap);
1354                         set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
1355                                 qpd->doorbell_bitmap);
1356                 }
1357         }
1358
1359         return 0;
1360 }
1361
1362 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
1363                                                         struct kfd_process *p)
1364 {
1365         int i;
1366
1367         for (i = 0; i < p->n_pdds; i++)
1368                 if (p->pdds[i]->dev == dev)
1369                         return p->pdds[i];
1370
1371         return NULL;
1372 }
1373
1374 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
1375                                                         struct kfd_process *p)
1376 {
1377         struct kfd_process_device *pdd = NULL;
1378
1379         if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
1380                 return NULL;
1381         pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
1382         if (!pdd)
1383                 return NULL;
1384
1385         if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
1386                 pr_err("Failed to alloc doorbell for pdd\n");
1387                 goto err_free_pdd;
1388         }
1389
1390         if (init_doorbell_bitmap(&pdd->qpd, dev)) {
1391                 pr_err("Failed to init doorbell for process\n");
1392                 goto err_free_pdd;
1393         }
1394
1395         pdd->dev = dev;
1396         INIT_LIST_HEAD(&pdd->qpd.queues_list);
1397         INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
1398         pdd->qpd.dqm = dev->dqm;
1399         pdd->qpd.pqm = &p->pqm;
1400         pdd->qpd.evicted = 0;
1401         pdd->qpd.mapped_gws_queue = false;
1402         pdd->process = p;
1403         pdd->bound = PDD_UNBOUND;
1404         pdd->already_dequeued = false;
1405         pdd->runtime_inuse = false;
1406         pdd->vram_usage = 0;
1407         pdd->sdma_past_activity_counter = 0;
1408         atomic64_set(&pdd->evict_duration_counter, 0);
1409         p->pdds[p->n_pdds++] = pdd;
1410
1411         /* Init idr used for memory handle translation */
1412         idr_init(&pdd->alloc_idr);
1413
1414         return pdd;
1415
1416 err_free_pdd:
1417         kfree(pdd);
1418         return NULL;
1419 }
1420
1421 /**
1422  * kfd_process_device_init_vm - Initialize a VM for a process-device
1423  *
1424  * @pdd: The process-device
1425  * @drm_file: Optional pointer to a DRM file descriptor
1426  *
1427  * If @drm_file is specified, it will be used to acquire the VM from
1428  * that file descriptor. If successful, the @pdd takes ownership of
1429  * the file descriptor.
1430  *
1431  * If @drm_file is NULL, a new VM is created.
1432  *
1433  * Returns 0 on success, -errno on failure.
1434  */
1435 int kfd_process_device_init_vm(struct kfd_process_device *pdd,
1436                                struct file *drm_file)
1437 {
1438         struct kfd_process *p;
1439         struct kfd_dev *dev;
1440         int ret;
1441
1442         if (!drm_file)
1443                 return -EINVAL;
1444
1445         if (pdd->drm_priv)
1446                 return -EBUSY;
1447
1448         p = pdd->process;
1449         dev = pdd->dev;
1450
1451         ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
1452                 dev->kgd, drm_file, p->pasid,
1453                 &p->kgd_process_info, &p->ef);
1454         if (ret) {
1455                 pr_err("Failed to create process VM object\n");
1456                 return ret;
1457         }
1458         pdd->drm_priv = drm_file->private_data;
1459
1460         ret = kfd_process_device_reserve_ib_mem(pdd);
1461         if (ret)
1462                 goto err_reserve_ib_mem;
1463         ret = kfd_process_device_init_cwsr_dgpu(pdd);
1464         if (ret)
1465                 goto err_init_cwsr;
1466
1467         pdd->drm_file = drm_file;
1468
1469         return 0;
1470
1471 err_init_cwsr:
1472 err_reserve_ib_mem:
1473         kfd_process_device_free_bos(pdd);
1474         pdd->drm_priv = NULL;
1475
1476         return ret;
1477 }
1478
1479 /*
1480  * Direct the IOMMU to bind the process (specifically the pasid->mm)
1481  * to the device.
1482  * Unbinding occurs when the process dies or the device is removed.
1483  *
1484  * Assumes that the process lock is held.
1485  */
1486 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
1487                                                         struct kfd_process *p)
1488 {
1489         struct kfd_process_device *pdd;
1490         int err;
1491
1492         pdd = kfd_get_process_device_data(dev, p);
1493         if (!pdd) {
1494                 pr_err("Process device data doesn't exist\n");
1495                 return ERR_PTR(-ENOMEM);
1496         }
1497
1498         if (!pdd->drm_priv)
1499                 return ERR_PTR(-ENODEV);
1500
1501         /*
1502          * signal runtime-pm system to auto resume and prevent
1503          * further runtime suspend once device pdd is created until
1504          * pdd is destroyed.
1505          */
1506         if (!pdd->runtime_inuse) {
1507                 err = pm_runtime_get_sync(dev->ddev->dev);
1508                 if (err < 0) {
1509                         pm_runtime_put_autosuspend(dev->ddev->dev);
1510                         return ERR_PTR(err);
1511                 }
1512         }
1513
1514         err = kfd_iommu_bind_process_to_device(pdd);
1515         if (err)
1516                 goto out;
1517
1518         /*
1519          * make sure that runtime_usage counter is incremented just once
1520          * per pdd
1521          */
1522         pdd->runtime_inuse = true;
1523
1524         return pdd;
1525
1526 out:
1527         /* balance runpm reference count and exit with error */
1528         if (!pdd->runtime_inuse) {
1529                 pm_runtime_mark_last_busy(dev->ddev->dev);
1530                 pm_runtime_put_autosuspend(dev->ddev->dev);
1531         }
1532
1533         return ERR_PTR(err);
1534 }
1535
1536 /* Create specific handle mapped to mem from process local memory idr
1537  * Assumes that the process lock is held.
1538  */
1539 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
1540                                         void *mem)
1541 {
1542         return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL);
1543 }
1544
1545 /* Translate specific handle from process local memory idr
1546  * Assumes that the process lock is held.
1547  */
1548 void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
1549                                         int handle)
1550 {
1551         if (handle < 0)
1552                 return NULL;
1553
1554         return idr_find(&pdd->alloc_idr, handle);
1555 }
1556
1557 /* Remove specific handle from process local memory idr
1558  * Assumes that the process lock is held.
1559  */
1560 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
1561                                         int handle)
1562 {
1563         if (handle >= 0)
1564                 idr_remove(&pdd->alloc_idr, handle);
1565 }
1566
1567 /* This increments the process->ref counter. */
1568 struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid)
1569 {
1570         struct kfd_process *p, *ret_p = NULL;
1571         unsigned int temp;
1572
1573         int idx = srcu_read_lock(&kfd_processes_srcu);
1574
1575         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1576                 if (p->pasid == pasid) {
1577                         kref_get(&p->ref);
1578                         ret_p = p;
1579                         break;
1580                 }
1581         }
1582
1583         srcu_read_unlock(&kfd_processes_srcu, idx);
1584
1585         return ret_p;
1586 }
1587
1588 /* This increments the process->ref counter. */
1589 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
1590 {
1591         struct kfd_process *p;
1592
1593         int idx = srcu_read_lock(&kfd_processes_srcu);
1594
1595         p = find_process_by_mm(mm);
1596         if (p)
1597                 kref_get(&p->ref);
1598
1599         srcu_read_unlock(&kfd_processes_srcu, idx);
1600
1601         return p;
1602 }
1603
1604 /* kfd_process_evict_queues - Evict all user queues of a process
1605  *
1606  * Eviction is reference-counted per process-device. This means multiple
1607  * evictions from different sources can be nested safely.
1608  */
1609 int kfd_process_evict_queues(struct kfd_process *p)
1610 {
1611         int r = 0;
1612         int i;
1613         unsigned int n_evicted = 0;
1614
1615         for (i = 0; i < p->n_pdds; i++) {
1616                 struct kfd_process_device *pdd = p->pdds[i];
1617
1618                 r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
1619                                                             &pdd->qpd);
1620                 if (r) {
1621                         pr_err("Failed to evict process queues\n");
1622                         goto fail;
1623                 }
1624                 n_evicted++;
1625         }
1626
1627         return r;
1628
1629 fail:
1630         /* To keep state consistent, roll back partial eviction by
1631          * restoring queues
1632          */
1633         for (i = 0; i < p->n_pdds; i++) {
1634                 struct kfd_process_device *pdd = p->pdds[i];
1635
1636                 if (n_evicted == 0)
1637                         break;
1638                 if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
1639                                                               &pdd->qpd))
1640                         pr_err("Failed to restore queues\n");
1641
1642                 n_evicted--;
1643         }
1644
1645         return r;
1646 }
1647
1648 /* kfd_process_restore_queues - Restore all user queues of a process */
1649 int kfd_process_restore_queues(struct kfd_process *p)
1650 {
1651         int r, ret = 0;
1652         int i;
1653
1654         for (i = 0; i < p->n_pdds; i++) {
1655                 struct kfd_process_device *pdd = p->pdds[i];
1656
1657                 r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
1658                                                               &pdd->qpd);
1659                 if (r) {
1660                         pr_err("Failed to restore process queues\n");
1661                         if (!ret)
1662                                 ret = r;
1663                 }
1664         }
1665
1666         return ret;
1667 }
1668
1669 int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)
1670 {
1671         int i;
1672
1673         for (i = 0; i < p->n_pdds; i++)
1674                 if (p->pdds[i] && gpu_id == p->pdds[i]->dev->id)
1675                         return i;
1676         return -EINVAL;
1677 }
1678
1679 int
1680 kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev,
1681                            uint32_t *gpuid, uint32_t *gpuidx)
1682 {
1683         struct kgd_dev *kgd = (struct kgd_dev *)adev;
1684         int i;
1685
1686         for (i = 0; i < p->n_pdds; i++)
1687                 if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) {
1688                         *gpuid = p->pdds[i]->dev->id;
1689                         *gpuidx = i;
1690                         return 0;
1691                 }
1692         return -EINVAL;
1693 }
1694
1695 static void evict_process_worker(struct work_struct *work)
1696 {
1697         int ret;
1698         struct kfd_process *p;
1699         struct delayed_work *dwork;
1700
1701         dwork = to_delayed_work(work);
1702
1703         /* Process termination destroys this worker thread. So during the
1704          * lifetime of this thread, kfd_process p will be valid
1705          */
1706         p = container_of(dwork, struct kfd_process, eviction_work);
1707         WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
1708                   "Eviction fence mismatch\n");
1709
1710         /* Narrow window of overlap between restore and evict work
1711          * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
1712          * unreserves KFD BOs, it is possible to evicted again. But
1713          * restore has few more steps of finish. So lets wait for any
1714          * previous restore work to complete
1715          */
1716         flush_delayed_work(&p->restore_work);
1717
1718         pr_debug("Started evicting pasid 0x%x\n", p->pasid);
1719         ret = kfd_process_evict_queues(p);
1720         if (!ret) {
1721                 dma_fence_signal(p->ef);
1722                 dma_fence_put(p->ef);
1723                 p->ef = NULL;
1724                 queue_delayed_work(kfd_restore_wq, &p->restore_work,
1725                                 msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
1726
1727                 pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
1728         } else
1729                 pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
1730 }
1731
1732 static void restore_process_worker(struct work_struct *work)
1733 {
1734         struct delayed_work *dwork;
1735         struct kfd_process *p;
1736         int ret = 0;
1737
1738         dwork = to_delayed_work(work);
1739
1740         /* Process termination destroys this worker thread. So during the
1741          * lifetime of this thread, kfd_process p will be valid
1742          */
1743         p = container_of(dwork, struct kfd_process, restore_work);
1744         pr_debug("Started restoring pasid 0x%x\n", p->pasid);
1745
1746         /* Setting last_restore_timestamp before successful restoration.
1747          * Otherwise this would have to be set by KGD (restore_process_bos)
1748          * before KFD BOs are unreserved. If not, the process can be evicted
1749          * again before the timestamp is set.
1750          * If restore fails, the timestamp will be set again in the next
1751          * attempt. This would mean that the minimum GPU quanta would be
1752          * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
1753          * functions)
1754          */
1755
1756         p->last_restore_timestamp = get_jiffies_64();
1757         ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
1758                                                      &p->ef);
1759         if (ret) {
1760                 pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
1761                          p->pasid, PROCESS_BACK_OFF_TIME_MS);
1762                 ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
1763                                 msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
1764                 WARN(!ret, "reschedule restore work failed\n");
1765                 return;
1766         }
1767
1768         ret = kfd_process_restore_queues(p);
1769         if (!ret)
1770                 pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
1771         else
1772                 pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
1773 }
1774
1775 void kfd_suspend_all_processes(void)
1776 {
1777         struct kfd_process *p;
1778         unsigned int temp;
1779         int idx = srcu_read_lock(&kfd_processes_srcu);
1780
1781         WARN(debug_evictions, "Evicting all processes");
1782         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1783                 cancel_delayed_work_sync(&p->eviction_work);
1784                 cancel_delayed_work_sync(&p->restore_work);
1785
1786                 if (kfd_process_evict_queues(p))
1787                         pr_err("Failed to suspend process 0x%x\n", p->pasid);
1788                 dma_fence_signal(p->ef);
1789                 dma_fence_put(p->ef);
1790                 p->ef = NULL;
1791         }
1792         srcu_read_unlock(&kfd_processes_srcu, idx);
1793 }
1794
1795 int kfd_resume_all_processes(void)
1796 {
1797         struct kfd_process *p;
1798         unsigned int temp;
1799         int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
1800
1801         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1802                 if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
1803                         pr_err("Restore process %d failed during resume\n",
1804                                p->pasid);
1805                         ret = -EFAULT;
1806                 }
1807         }
1808         srcu_read_unlock(&kfd_processes_srcu, idx);
1809         return ret;
1810 }
1811
1812 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
1813                           struct vm_area_struct *vma)
1814 {
1815         struct kfd_process_device *pdd;
1816         struct qcm_process_device *qpd;
1817
1818         if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
1819                 pr_err("Incorrect CWSR mapping size.\n");
1820                 return -EINVAL;
1821         }
1822
1823         pdd = kfd_get_process_device_data(dev, process);
1824         if (!pdd)
1825                 return -EINVAL;
1826         qpd = &pdd->qpd;
1827
1828         qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1829                                         get_order(KFD_CWSR_TBA_TMA_SIZE));
1830         if (!qpd->cwsr_kaddr) {
1831                 pr_err("Error allocating per process CWSR buffer.\n");
1832                 return -ENOMEM;
1833         }
1834
1835         vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
1836                 | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
1837         /* Mapping pages to user process */
1838         return remap_pfn_range(vma, vma->vm_start,
1839                                PFN_DOWN(__pa(qpd->cwsr_kaddr)),
1840                                KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
1841 }
1842
1843 void kfd_flush_tlb(struct kfd_process_device *pdd)
1844 {
1845         struct kfd_dev *dev = pdd->dev;
1846
1847         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1848                 /* Nothing to flush until a VMID is assigned, which
1849                  * only happens when the first queue is created.
1850                  */
1851                 if (pdd->qpd.vmid)
1852                         amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
1853                                                         pdd->qpd.vmid);
1854         } else {
1855                 amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
1856                                         pdd->process->pasid, TLB_FLUSH_LEGACY);
1857         }
1858 }
1859
1860 #if defined(CONFIG_DEBUG_FS)
1861
1862 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
1863 {
1864         struct kfd_process *p;
1865         unsigned int temp;
1866         int r = 0;
1867
1868         int idx = srcu_read_lock(&kfd_processes_srcu);
1869
1870         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1871                 seq_printf(m, "Process %d PASID 0x%x:\n",
1872                            p->lead_thread->tgid, p->pasid);
1873
1874                 mutex_lock(&p->mutex);
1875                 r = pqm_debugfs_mqds(m, &p->pqm);
1876                 mutex_unlock(&p->mutex);
1877
1878                 if (r)
1879                         break;
1880         }
1881
1882         srcu_read_unlock(&kfd_processes_srcu, idx);
1883
1884         return r;
1885 }
1886
1887 #endif
1888