drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c

   1 /*
   2  * Copyright 2014 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20  *
  21  * The above copyright notice and this permission notice (including the
  22  * next paragraph) shall be included in all copies or substantial portions
  23  * of the Software.
  24  *
  25  */
  26 /*
  27  * Authors:
  28  *    Christian König <christian.koenig@amd.com>
  29  */
  30
  31 /**
  32  * DOC: MMU Notifier
  33  *
  34  * For coherent userptr handling registers an MMU notifier to inform the driver
  35  * about updates on the page tables of a process.
  36  *
  37  * When somebody tries to invalidate the page tables we block the update until
  38  * all operations on the pages in question are completed, then those pages are
  39  * marked as accessed and also dirty if it wasn't a read only access.
  40  *
  41  * New command submissions using the userptrs in question are delayed until all
  42  * page table invalidation are completed and we once more see a coherent process
  43  * address space.
  44  */
  45
  46 #include <linux/firmware.h>
  47 #include <linux/module.h>
  48 #include <linux/mmu_notifier.h>
  49 #include <linux/interval_tree.h>
  50 #include <drm/drmP.h>
  51 #include <drm/drm.h>
  52
  53 #include "amdgpu.h"
  54 #include "amdgpu_amdkfd.h"
  55
  56 /**
  57  * struct amdgpu_mn
  58  *
  59  * @adev: amdgpu device pointer
  60  * @mm: process address space
  61  * @mn: MMU notifier structure
  62  * @type: type of MMU notifier
  63  * @work: destruction work item
  64  * @node: hash table node to find structure by adev and mn
  65  * @lock: rw semaphore protecting the notifier nodes
  66  * @objects: interval tree containing amdgpu_mn_nodes
  67  * @read_lock: mutex for recursive locking of @lock
  68  * @recursion: depth of recursion
  69  *
  70  * Data for each amdgpu device and process address space.
  71  */
  72 struct amdgpu_mn {
  73         /* constant after initialisation */
  74         struct amdgpu_device    *adev;
  75         struct mm_struct        *mm;
  76         struct mmu_notifier     mn;
  77         enum amdgpu_mn_type     type;
  78
  79         /* only used on destruction */
  80         struct work_struct      work;
  81
  82         /* protected by adev->mn_lock */
  83         struct hlist_node       node;
  84
  85         /* objects protected by lock */
  86         struct rw_semaphore     lock;
  87         struct rb_root_cached   objects;
  88         struct mutex            read_lock;
  89         atomic_t                recursion;
  90 };
  91
  92 /**
  93  * struct amdgpu_mn_node
  94  *
  95  * @it: interval node defining start-last of the affected address range
  96  * @bos: list of all BOs in the affected address range
  97  *
  98  * Manages all BOs which are affected of a certain range of address space.
  99  */
 100 struct amdgpu_mn_node {
 101         struct interval_tree_node       it;
 102         struct list_head                bos;
 103 };
 104
 105 /**
 106  * amdgpu_mn_destroy - destroy the MMU notifier
 107  *
 108  * @work: previously sheduled work item
 109  *
 110  * Lazy destroys the notifier from a work item
 111  */
 112 static void amdgpu_mn_destroy(struct work_struct *work)
 113 {
 114         struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work);
 115         struct amdgpu_device *adev = amn->adev;
 116         struct amdgpu_mn_node *node, *next_node;
 117         struct amdgpu_bo *bo, *next_bo;
 118
 119         mutex_lock(&adev->mn_lock);
 120         down_write(&amn->lock);
 121         hash_del(&amn->node);
 122         rbtree_postorder_for_each_entry_safe(node, next_node,
 123                                              &amn->objects.rb_root, it.rb) {
 124                 list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
 125                         bo->mn = NULL;
 126                         list_del_init(&bo->mn_list);
 127                 }
 128                 kfree(node);
 129         }
 130         up_write(&amn->lock);
 131         mutex_unlock(&adev->mn_lock);
 132         mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
 133         kfree(amn);
 134 }
 135
 136 /**
 137  * amdgpu_mn_release - callback to notify about mm destruction
 138  *
 139  * @mn: our notifier
 140  * @mm: the mm this callback is about
 141  *
 142  * Shedule a work item to lazy destroy our notifier.
 143  */
 144 static void amdgpu_mn_release(struct mmu_notifier *mn,
 145                               struct mm_struct *mm)
 146 {
 147         struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 148
 149         INIT_WORK(&amn->work, amdgpu_mn_destroy);
 150         schedule_work(&amn->work);
 151 }
 152
 153
 154 /**
 155  * amdgpu_mn_lock - take the write side lock for this notifier
 156  *
 157  * @mn: our notifier
 158  */
 159 void amdgpu_mn_lock(struct amdgpu_mn *mn)
 160 {
 161         if (mn)
 162                 down_write(&mn->lock);
 163 }
 164
 165 /**
 166  * amdgpu_mn_unlock - drop the write side lock for this notifier
 167  *
 168  * @mn: our notifier
 169  */
 170 void amdgpu_mn_unlock(struct amdgpu_mn *mn)
 171 {
 172         if (mn)
 173                 up_write(&mn->lock);
 174 }
 175
 176 /**
 177  * amdgpu_mn_read_lock - take the read side lock for this notifier
 178  *
 179  * @amn: our notifier
 180  */
 181 static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
 182 {
 183         if (blockable)
 184                 mutex_lock(&amn->read_lock);
 185         else if (!mutex_trylock(&amn->read_lock))
 186                 return -EAGAIN;
 187
 188         if (atomic_inc_return(&amn->recursion) == 1)
 189                 down_read_non_owner(&amn->lock);
 190         mutex_unlock(&amn->read_lock);
 191
 192         return 0;
 193 }
 194
 195 /**
 196  * amdgpu_mn_read_unlock - drop the read side lock for this notifier
 197  *
 198  * @amn: our notifier
 199  */
 200 static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
 201 {
 202         if (atomic_dec_return(&amn->recursion) == 0)
 203                 up_read_non_owner(&amn->lock);
 204 }
 205
 206 /**
 207  * amdgpu_mn_invalidate_node - unmap all BOs of a node
 208  *
 209  * @node: the node with the BOs to unmap
 210  * @start: start of address range affected
 211  * @end: end of address range affected
 212  *
 213  * Block for operations on BOs to finish and mark pages as accessed and
 214  * potentially dirty.
 215  */
 216 static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 217                                       unsigned long start,
 218                                       unsigned long end)
 219 {
 220         struct amdgpu_bo *bo;
 221         long r;
 222
 223         list_for_each_entry(bo, &node->bos, mn_list) {
 224
 225                 if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end))
 226                         continue;
 227
 228                 r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
 229                         true, false, MAX_SCHEDULE_TIMEOUT);
 230                 if (r <= 0)
 231                         DRM_ERROR("(%ld) failed to wait for user bo\n", r);
 232
 233                 amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
 234         }
 235 }
 236
 237 /**
 238  * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
 239  *
 240  * @mn: our notifier
 241  * @range: mmu notifier context
 242  *
 243  * Block for operations on BOs to finish and mark pages as accessed and
 244  * potentially dirty.
 245  */
 246 static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
 247                         const struct mmu_notifier_range *range)
 248 {
 249         struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 250         struct interval_tree_node *it;
 251         unsigned long end;
 252
 253         /* notification is exclusive, but interval is inclusive */
 254         end = range->end - 1;
 255
 256         /* TODO we should be able to split locking for interval tree and
 257          * amdgpu_mn_invalidate_node
 258          */
 259         if (amdgpu_mn_read_lock(amn, range->blockable))
 260                 return -EAGAIN;
 261
 262         it = interval_tree_iter_first(&amn->objects, range->start, end);
 263         while (it) {
 264                 struct amdgpu_mn_node *node;
 265
 266                 if (!range->blockable) {
 267                         amdgpu_mn_read_unlock(amn);
 268                         return -EAGAIN;
 269                 }
 270
 271                 node = container_of(it, struct amdgpu_mn_node, it);
 272                 it = interval_tree_iter_next(it, range->start, end);
 273
 274                 amdgpu_mn_invalidate_node(node, range->start, end);
 275         }
 276
 277         return 0;
 278 }
 279
 280 /**
 281  * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
 282  *
 283  * @mn: our notifier
 284  * @mm: the mm this callback is about
 285  * @start: start of updated range
 286  * @end: end of updated range
 287  *
 288  * We temporarily evict all BOs between start and end. This
 289  * necessitates evicting all user-mode queues of the process. The BOs
 290  * are restorted in amdgpu_mn_invalidate_range_end_hsa.
 291  */
 292 static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 293                         const struct mmu_notifier_range *range)
 294 {
 295         struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 296         struct interval_tree_node *it;
 297         unsigned long end;
 298
 299         /* notification is exclusive, but interval is inclusive */
 300         end = range->end - 1;
 301
 302         if (amdgpu_mn_read_lock(amn, range->blockable))
 303                 return -EAGAIN;
 304
 305         it = interval_tree_iter_first(&amn->objects, range->start, end);
 306         while (it) {
 307                 struct amdgpu_mn_node *node;
 308                 struct amdgpu_bo *bo;
 309
 310                 if (!range->blockable) {
 311                         amdgpu_mn_read_unlock(amn);
 312                         return -EAGAIN;
 313                 }
 314
 315                 node = container_of(it, struct amdgpu_mn_node, it);
 316                 it = interval_tree_iter_next(it, range->start, end);
 317
 318                 list_for_each_entry(bo, &node->bos, mn_list) {
 319                         struct kgd_mem *mem = bo->kfd_bo;
 320
 321                         if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
 322                                                          range->start,
 323                                                          end))
 324                                 amdgpu_amdkfd_evict_userptr(mem, range->mm);
 325                 }
 326         }
 327
 328         return 0;
 329 }
 330
 331 /**
 332  * amdgpu_mn_invalidate_range_end - callback to notify about mm change
 333  *
 334  * @mn: our notifier
 335  * @mm: the mm this callback is about
 336  * @start: start of updated range
 337  * @end: end of updated range
 338  *
 339  * Release the lock again to allow new command submissions.
 340  */
 341 static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
 342                         const struct mmu_notifier_range *range)
 343 {
 344         struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 345
 346         amdgpu_mn_read_unlock(amn);
 347 }
 348
 349 static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
 350         [AMDGPU_MN_TYPE_GFX] = {
 351                 .release = amdgpu_mn_release,
 352                 .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
 353                 .invalidate_range_end = amdgpu_mn_invalidate_range_end,
 354         },
 355         [AMDGPU_MN_TYPE_HSA] = {
 356                 .release = amdgpu_mn_release,
 357                 .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
 358                 .invalidate_range_end = amdgpu_mn_invalidate_range_end,
 359         },
 360 };
 361
 362 /* Low bits of any reasonable mm pointer will be unused due to struct
 363  * alignment. Use these bits to make a unique key from the mm pointer
 364  * and notifier type.
 365  */
 366 #define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
 367
 368 /**
 369  * amdgpu_mn_get - create notifier context
 370  *
 371  * @adev: amdgpu device pointer
 372  * @type: type of MMU notifier context
 373  *
 374  * Creates a notifier context for current->mm.
 375  */
 376 struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 377                                 enum amdgpu_mn_type type)
 378 {
 379         struct mm_struct *mm = current->mm;
 380         struct amdgpu_mn *amn;
 381         unsigned long key = AMDGPU_MN_KEY(mm, type);
 382         int r;
 383
 384         mutex_lock(&adev->mn_lock);
 385         if (down_write_killable(&mm->mmap_sem)) {
 386                 mutex_unlock(&adev->mn_lock);
 387                 return ERR_PTR(-EINTR);
 388         }
 389
 390         hash_for_each_possible(adev->mn_hash, amn, node, key)
 391                 if (AMDGPU_MN_KEY(amn->mm, amn->type) == key)
 392                         goto release_locks;
 393
 394         amn = kzalloc(sizeof(*amn), GFP_KERNEL);
 395         if (!amn) {
 396                 amn = ERR_PTR(-ENOMEM);
 397                 goto release_locks;
 398         }
 399
 400         amn->adev = adev;
 401         amn->mm = mm;
 402         init_rwsem(&amn->lock);
 403         amn->type = type;
 404         amn->mn.ops = &amdgpu_mn_ops[type];
 405         amn->objects = RB_ROOT_CACHED;
 406         mutex_init(&amn->read_lock);
 407         atomic_set(&amn->recursion, 0);
 408
 409         r = __mmu_notifier_register(&amn->mn, mm);
 410         if (r)
 411                 goto free_amn;
 412
 413         hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type));
 414
 415 release_locks:
 416         up_write(&mm->mmap_sem);
 417         mutex_unlock(&adev->mn_lock);
 418
 419         return amn;
 420
 421 free_amn:
 422         up_write(&mm->mmap_sem);
 423         mutex_unlock(&adev->mn_lock);
 424         kfree(amn);
 425
 426         return ERR_PTR(r);
 427 }
 428
 429 /**
 430  * amdgpu_mn_register - register a BO for notifier updates
 431  *
 432  * @bo: amdgpu buffer object
 433  * @addr: userptr addr we should monitor
 434  *
 435  * Registers an MMU notifier for the given BO at the specified address.
 436  * Returns 0 on success, -ERRNO if anything goes wrong.
 437  */
 438 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 439 {
 440         unsigned long end = addr + amdgpu_bo_size(bo) - 1;
 441         struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 442         enum amdgpu_mn_type type =
 443                 bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
 444         struct amdgpu_mn *amn;
 445         struct amdgpu_mn_node *node = NULL, *new_node;
 446         struct list_head bos;
 447         struct interval_tree_node *it;
 448
 449         amn = amdgpu_mn_get(adev, type);
 450         if (IS_ERR(amn))
 451                 return PTR_ERR(amn);
 452
 453         new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
 454         if (!new_node)
 455                 return -ENOMEM;
 456
 457         INIT_LIST_HEAD(&bos);
 458
 459         down_write(&amn->lock);
 460
 461         while ((it = interval_tree_iter_first(&amn->objects, addr, end))) {
 462                 kfree(node);
 463                 node = container_of(it, struct amdgpu_mn_node, it);
 464                 interval_tree_remove(&node->it, &amn->objects);
 465                 addr = min(it->start, addr);
 466                 end = max(it->last, end);
 467                 list_splice(&node->bos, &bos);
 468         }
 469
 470         if (!node)
 471                 node = new_node;
 472         else
 473                 kfree(new_node);
 474
 475         bo->mn = amn;
 476
 477         node->it.start = addr;
 478         node->it.last = end;
 479         INIT_LIST_HEAD(&node->bos);
 480         list_splice(&bos, &node->bos);
 481         list_add(&bo->mn_list, &node->bos);
 482
 483         interval_tree_insert(&node->it, &amn->objects);
 484
 485         up_write(&amn->lock);
 486
 487         return 0;
 488 }
 489
 490 /**
 491  * amdgpu_mn_unregister - unregister a BO for notifier updates
 492  *
 493  * @bo: amdgpu buffer object
 494  *
 495  * Remove any registration of MMU notifier updates from the buffer object.
 496  */
 497 void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 498 {
 499         struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 500         struct amdgpu_mn *amn;
 501         struct list_head *head;
 502
 503         mutex_lock(&adev->mn_lock);
 504
 505         amn = bo->mn;
 506         if (amn == NULL) {
 507                 mutex_unlock(&adev->mn_lock);
 508                 return;
 509         }
 510
 511         down_write(&amn->lock);
 512
 513         /* save the next list entry for later */
 514         head = bo->mn_list.next;
 515
 516         bo->mn = NULL;
 517         list_del_init(&bo->mn_list);
 518
 519         if (list_empty(head)) {
 520                 struct amdgpu_mn_node *node;
 521
 522                 node = container_of(head, struct amdgpu_mn_node, bos);
 523                 interval_tree_remove(&node->it, &amn->objects);
 524                 kfree(node);
 525         }
 526
 527         up_write(&amn->lock);
 528         mutex_unlock(&adev->mn_lock);
 529 }
 530