drivers/gpu/drm/i915/gem/i915_gem_userptr.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2012-2014 Intel Corporation
   5  */
   6
   7 #include <linux/mmu_context.h>
   8 #include <linux/mmu_notifier.h>
   9 #include <linux/mempolicy.h>
  10 #include <linux/swap.h>
  11 #include <linux/sched/mm.h>
  12
  13 #include <drm/i915_drm.h>
  14
  15 #include "i915_drv.h"
  16 #include "i915_gem_ioctls.h"
  17 #include "i915_gem_object.h"
  18 #include "i915_scatterlist.h"
  19
  20 struct i915_mm_struct {
  21         struct mm_struct *mm;
  22         struct drm_i915_private *i915;
  23         struct i915_mmu_notifier *mn;
  24         struct hlist_node node;
  25         struct kref kref;
  26         struct work_struct work;
  27 };
  28
  29 #if defined(CONFIG_MMU_NOTIFIER)
  30 #include <linux/interval_tree.h>
  31
  32 struct i915_mmu_notifier {
  33         spinlock_t lock;
  34         struct hlist_node node;
  35         struct mmu_notifier mn;
  36         struct rb_root_cached objects;
  37         struct i915_mm_struct *mm;
  38 };
  39
  40 struct i915_mmu_object {
  41         struct i915_mmu_notifier *mn;
  42         struct drm_i915_gem_object *obj;
  43         struct interval_tree_node it;
  44 };
  45
  46 static void add_object(struct i915_mmu_object *mo)
  47 {
  48         GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb));
  49         interval_tree_insert(&mo->it, &mo->mn->objects);
  50 }
  51
  52 static void del_object(struct i915_mmu_object *mo)
  53 {
  54         if (RB_EMPTY_NODE(&mo->it.rb))
  55                 return;
  56
  57         interval_tree_remove(&mo->it, &mo->mn->objects);
  58         RB_CLEAR_NODE(&mo->it.rb);
  59 }
  60
  61 static void
  62 __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
  63 {
  64         struct i915_mmu_object *mo = obj->userptr.mmu_object;
  65
  66         /*
  67          * During mm_invalidate_range we need to cancel any userptr that
  68          * overlaps the range being invalidated. Doing so requires the
  69          * struct_mutex, and that risks recursion. In order to cause
  70          * recursion, the user must alias the userptr address space with
  71          * a GTT mmapping (possible with a MAP_FIXED) - then when we have
  72          * to invalidate that mmaping, mm_invalidate_range is called with
  73          * the userptr address *and* the struct_mutex held.  To prevent that
  74          * we set a flag under the i915_mmu_notifier spinlock to indicate
  75          * whether this object is valid.
  76          */
  77         if (!mo)
  78                 return;
  79
  80         spin_lock(&mo->mn->lock);
  81         if (value)
  82                 add_object(mo);
  83         else
  84                 del_object(mo);
  85         spin_unlock(&mo->mn->lock);
  86 }
  87
  88 static int
  89 userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
  90                                   const struct mmu_notifier_range *range)
  91 {
  92         struct i915_mmu_notifier *mn =
  93                 container_of(_mn, struct i915_mmu_notifier, mn);
  94         struct interval_tree_node *it;
  95         unsigned long end;
  96         int ret = 0;
  97
  98         if (RB_EMPTY_ROOT(&mn->objects.rb_root))
  99                 return 0;
 100
 101         /* interval ranges are inclusive, but invalidate range is exclusive */
 102         end = range->end - 1;
 103
 104         spin_lock(&mn->lock);
 105         it = interval_tree_iter_first(&mn->objects, range->start, end);
 106         while (it) {
 107                 struct drm_i915_gem_object *obj;
 108
 109                 if (!mmu_notifier_range_blockable(range)) {
 110                         ret = -EAGAIN;
 111                         break;
 112                 }
 113
 114                 /*
 115                  * The mmu_object is released late when destroying the
 116                  * GEM object so it is entirely possible to gain a
 117                  * reference on an object in the process of being freed
 118                  * since our serialisation is via the spinlock and not
 119                  * the struct_mutex - and consequently use it after it
 120                  * is freed and then double free it. To prevent that
 121                  * use-after-free we only acquire a reference on the
 122                  * object if it is not in the process of being destroyed.
 123                  */
 124                 obj = container_of(it, struct i915_mmu_object, it)->obj;
 125                 if (!kref_get_unless_zero(&obj->base.refcount)) {
 126                         it = interval_tree_iter_next(it, range->start, end);
 127                         continue;
 128                 }
 129                 spin_unlock(&mn->lock);
 130
 131                 ret = i915_gem_object_unbind(obj,
 132                                              I915_GEM_OBJECT_UNBIND_ACTIVE);
 133                 if (ret == 0)
 134                         ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
 135                 i915_gem_object_put(obj);
 136                 if (ret)
 137                         return ret;
 138
 139                 spin_lock(&mn->lock);
 140
 141                 /*
 142                  * As we do not (yet) protect the mmu from concurrent insertion
 143                  * over this range, there is no guarantee that this search will
 144                  * terminate given a pathologic workload.
 145                  */
 146                 it = interval_tree_iter_first(&mn->objects, range->start, end);
 147         }
 148         spin_unlock(&mn->lock);
 149
 150         return ret;
 151
 152 }
 153
 154 static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
 155         .invalidate_range_start = userptr_mn_invalidate_range_start,
 156 };
 157
 158 static struct i915_mmu_notifier *
 159 i915_mmu_notifier_create(struct i915_mm_struct *mm)
 160 {
 161         struct i915_mmu_notifier *mn;
 162
 163         mn = kmalloc(sizeof(*mn), GFP_KERNEL);
 164         if (mn == NULL)
 165                 return ERR_PTR(-ENOMEM);
 166
 167         spin_lock_init(&mn->lock);
 168         mn->mn.ops = &i915_gem_userptr_notifier;
 169         mn->objects = RB_ROOT_CACHED;
 170         mn->mm = mm;
 171
 172         return mn;
 173 }
 174
 175 static void
 176 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 177 {
 178         struct i915_mmu_object *mo;
 179
 180         mo = fetch_and_zero(&obj->userptr.mmu_object);
 181         if (!mo)
 182                 return;
 183
 184         spin_lock(&mo->mn->lock);
 185         del_object(mo);
 186         spin_unlock(&mo->mn->lock);
 187         kfree(mo);
 188 }
 189
 190 static struct i915_mmu_notifier *
 191 i915_mmu_notifier_find(struct i915_mm_struct *mm)
 192 {
 193         struct i915_mmu_notifier *mn;
 194         int err = 0;
 195
 196         mn = mm->mn;
 197         if (mn)
 198                 return mn;
 199
 200         mn = i915_mmu_notifier_create(mm);
 201         if (IS_ERR(mn))
 202                 err = PTR_ERR(mn);
 203
 204         down_write(&mm->mm->mmap_sem);
 205         mutex_lock(&mm->i915->mm_lock);
 206         if (mm->mn == NULL && !err) {
 207                 /* Protected by mmap_sem (write-lock) */
 208                 err = __mmu_notifier_register(&mn->mn, mm->mm);
 209                 if (!err) {
 210                         /* Protected by mm_lock */
 211                         mm->mn = fetch_and_zero(&mn);
 212                 }
 213         } else if (mm->mn) {
 214                 /*
 215                  * Someone else raced and successfully installed the mmu
 216                  * notifier, we can cancel our own errors.
 217                  */
 218                 err = 0;
 219         }
 220         mutex_unlock(&mm->i915->mm_lock);
 221         up_write(&mm->mm->mmap_sem);
 222
 223         if (mn && !IS_ERR(mn))
 224                 kfree(mn);
 225
 226         return err ? ERR_PTR(err) : mm->mn;
 227 }
 228
 229 static int
 230 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
 231                                     unsigned flags)
 232 {
 233         struct i915_mmu_notifier *mn;
 234         struct i915_mmu_object *mo;
 235
 236         if (flags & I915_USERPTR_UNSYNCHRONIZED)
 237                 return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
 238
 239         if (WARN_ON(obj->userptr.mm == NULL))
 240                 return -EINVAL;
 241
 242         mn = i915_mmu_notifier_find(obj->userptr.mm);
 243         if (IS_ERR(mn))
 244                 return PTR_ERR(mn);
 245
 246         mo = kzalloc(sizeof(*mo), GFP_KERNEL);
 247         if (!mo)
 248                 return -ENOMEM;
 249
 250         mo->mn = mn;
 251         mo->obj = obj;
 252         mo->it.start = obj->userptr.ptr;
 253         mo->it.last = obj->userptr.ptr + obj->base.size - 1;
 254         RB_CLEAR_NODE(&mo->it.rb);
 255
 256         obj->userptr.mmu_object = mo;
 257         return 0;
 258 }
 259
 260 static void
 261 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
 262                        struct mm_struct *mm)
 263 {
 264         if (mn == NULL)
 265                 return;
 266
 267         mmu_notifier_unregister(&mn->mn, mm);
 268         kfree(mn);
 269 }
 270
 271 #else
 272
 273 static void
 274 __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
 275 {
 276 }
 277
 278 static void
 279 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 280 {
 281 }
 282
 283 static int
 284 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
 285                                     unsigned flags)
 286 {
 287         if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
 288                 return -ENODEV;
 289
 290         if (!capable(CAP_SYS_ADMIN))
 291                 return -EPERM;
 292
 293         return 0;
 294 }
 295
 296 static void
 297 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
 298                        struct mm_struct *mm)
 299 {
 300 }
 301
 302 #endif
 303
 304 static struct i915_mm_struct *
 305 __i915_mm_struct_find(struct drm_i915_private *dev_priv, struct mm_struct *real)
 306 {
 307         struct i915_mm_struct *mm;
 308
 309         /* Protected by dev_priv->mm_lock */
 310         hash_for_each_possible(dev_priv->mm_structs, mm, node, (unsigned long)real)
 311                 if (mm->mm == real)
 312                         return mm;
 313
 314         return NULL;
 315 }
 316
 317 static int
 318 i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
 319 {
 320         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 321         struct i915_mm_struct *mm;
 322         int ret = 0;
 323
 324         /* During release of the GEM object we hold the struct_mutex. This
 325          * precludes us from calling mmput() at that time as that may be
 326          * the last reference and so call exit_mmap(). exit_mmap() will
 327          * attempt to reap the vma, and if we were holding a GTT mmap
 328          * would then call drm_gem_vm_close() and attempt to reacquire
 329          * the struct mutex. So in order to avoid that recursion, we have
 330          * to defer releasing the mm reference until after we drop the
 331          * struct_mutex, i.e. we need to schedule a worker to do the clean
 332          * up.
 333          */
 334         mutex_lock(&dev_priv->mm_lock);
 335         mm = __i915_mm_struct_find(dev_priv, current->mm);
 336         if (mm == NULL) {
 337                 mm = kmalloc(sizeof(*mm), GFP_KERNEL);
 338                 if (mm == NULL) {
 339                         ret = -ENOMEM;
 340                         goto out;
 341                 }
 342
 343                 kref_init(&mm->kref);
 344                 mm->i915 = to_i915(obj->base.dev);
 345
 346                 mm->mm = current->mm;
 347                 mmgrab(current->mm);
 348
 349                 mm->mn = NULL;
 350
 351                 /* Protected by dev_priv->mm_lock */
 352                 hash_add(dev_priv->mm_structs,
 353                          &mm->node, (unsigned long)mm->mm);
 354         } else
 355                 kref_get(&mm->kref);
 356
 357         obj->userptr.mm = mm;
 358 out:
 359         mutex_unlock(&dev_priv->mm_lock);
 360         return ret;
 361 }
 362
 363 static void
 364 __i915_mm_struct_free__worker(struct work_struct *work)
 365 {
 366         struct i915_mm_struct *mm = container_of(work, typeof(*mm), work);
 367         i915_mmu_notifier_free(mm->mn, mm->mm);
 368         mmdrop(mm->mm);
 369         kfree(mm);
 370 }
 371
 372 static void
 373 __i915_mm_struct_free(struct kref *kref)
 374 {
 375         struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref);
 376
 377         /* Protected by dev_priv->mm_lock */
 378         hash_del(&mm->node);
 379         mutex_unlock(&mm->i915->mm_lock);
 380
 381         INIT_WORK(&mm->work, __i915_mm_struct_free__worker);
 382         queue_work(mm->i915->mm.userptr_wq, &mm->work);
 383 }
 384
 385 static void
 386 i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj)
 387 {
 388         if (obj->userptr.mm == NULL)
 389                 return;
 390
 391         kref_put_mutex(&obj->userptr.mm->kref,
 392                        __i915_mm_struct_free,
 393                        &to_i915(obj->base.dev)->mm_lock);
 394         obj->userptr.mm = NULL;
 395 }
 396
 397 struct get_pages_work {
 398         struct work_struct work;
 399         struct drm_i915_gem_object *obj;
 400         struct task_struct *task;
 401 };
 402
 403 static struct sg_table *
 404 __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
 405                                struct page **pvec, int num_pages)
 406 {
 407         unsigned int max_segment = i915_sg_segment_size();
 408         struct sg_table *st;
 409         unsigned int sg_page_sizes;
 410         int ret;
 411
 412         st = kmalloc(sizeof(*st), GFP_KERNEL);
 413         if (!st)
 414                 return ERR_PTR(-ENOMEM);
 415
 416 alloc_table:
 417         ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
 418                                           0, num_pages << PAGE_SHIFT,
 419                                           max_segment,
 420                                           GFP_KERNEL);
 421         if (ret) {
 422                 kfree(st);
 423                 return ERR_PTR(ret);
 424         }
 425
 426         ret = i915_gem_gtt_prepare_pages(obj, st);
 427         if (ret) {
 428                 sg_free_table(st);
 429
 430                 if (max_segment > PAGE_SIZE) {
 431                         max_segment = PAGE_SIZE;
 432                         goto alloc_table;
 433                 }
 434
 435                 kfree(st);
 436                 return ERR_PTR(ret);
 437         }
 438
 439         sg_page_sizes = i915_sg_page_sizes(st->sgl);
 440
 441         __i915_gem_object_set_pages(obj, st, sg_page_sizes);
 442
 443         return st;
 444 }
 445
 446 static void
 447 __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 448 {
 449         struct get_pages_work *work = container_of(_work, typeof(*work), work);
 450         struct drm_i915_gem_object *obj = work->obj;
 451         const int npages = obj->base.size >> PAGE_SHIFT;
 452         struct page **pvec;
 453         int pinned, ret;
 454
 455         ret = -ENOMEM;
 456         pinned = 0;
 457
 458         pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 459         if (pvec != NULL) {
 460                 struct mm_struct *mm = obj->userptr.mm->mm;
 461                 unsigned int flags = 0;
 462
 463                 if (!i915_gem_object_is_readonly(obj))
 464                         flags |= FOLL_WRITE;
 465
 466                 ret = -EFAULT;
 467                 if (mmget_not_zero(mm)) {
 468                         down_read(&mm->mmap_sem);
 469                         while (pinned < npages) {
 470                                 ret = get_user_pages_remote
 471                                         (work->task, mm,
 472                                          obj->userptr.ptr + pinned * PAGE_SIZE,
 473                                          npages - pinned,
 474                                          flags,
 475                                          pvec + pinned, NULL, NULL);
 476                                 if (ret < 0)
 477                                         break;
 478
 479                                 pinned += ret;
 480                         }
 481                         up_read(&mm->mmap_sem);
 482                         mmput(mm);
 483                 }
 484         }
 485
 486         mutex_lock(&obj->mm.lock);
 487         if (obj->userptr.work == &work->work) {
 488                 struct sg_table *pages = ERR_PTR(ret);
 489
 490                 if (pinned == npages) {
 491                         pages = __i915_gem_userptr_alloc_pages(obj, pvec,
 492                                                                npages);
 493                         if (!IS_ERR(pages)) {
 494                                 pinned = 0;
 495                                 pages = NULL;
 496                         }
 497                 }
 498
 499                 obj->userptr.work = ERR_CAST(pages);
 500                 if (IS_ERR(pages))
 501                         __i915_gem_userptr_set_active(obj, false);
 502         }
 503         mutex_unlock(&obj->mm.lock);
 504
 505         release_pages(pvec, pinned);
 506         kvfree(pvec);
 507
 508         i915_gem_object_put(obj);
 509         put_task_struct(work->task);
 510         kfree(work);
 511 }
 512
 513 static struct sg_table *
 514 __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
 515 {
 516         struct get_pages_work *work;
 517
 518         /* Spawn a worker so that we can acquire the
 519          * user pages without holding our mutex. Access
 520          * to the user pages requires mmap_sem, and we have
 521          * a strict lock ordering of mmap_sem, struct_mutex -
 522          * we already hold struct_mutex here and so cannot
 523          * call gup without encountering a lock inversion.
 524          *
 525          * Userspace will keep on repeating the operation
 526          * (thanks to EAGAIN) until either we hit the fast
 527          * path or the worker completes. If the worker is
 528          * cancelled or superseded, the task is still run
 529          * but the results ignored. (This leads to
 530          * complications that we may have a stray object
 531          * refcount that we need to be wary of when
 532          * checking for existing objects during creation.)
 533          * If the worker encounters an error, it reports
 534          * that error back to this function through
 535          * obj->userptr.work = ERR_PTR.
 536          */
 537         work = kmalloc(sizeof(*work), GFP_KERNEL);
 538         if (work == NULL)
 539                 return ERR_PTR(-ENOMEM);
 540
 541         obj->userptr.work = &work->work;
 542
 543         work->obj = i915_gem_object_get(obj);
 544
 545         work->task = current;
 546         get_task_struct(work->task);
 547
 548         INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
 549         queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work);
 550
 551         return ERR_PTR(-EAGAIN);
 552 }
 553
 554 static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 555 {
 556         const int num_pages = obj->base.size >> PAGE_SHIFT;
 557         struct mm_struct *mm = obj->userptr.mm->mm;
 558         struct page **pvec;
 559         struct sg_table *pages;
 560         bool active;
 561         int pinned;
 562
 563         /* If userspace should engineer that these pages are replaced in
 564          * the vma between us binding this page into the GTT and completion
 565          * of rendering... Their loss. If they change the mapping of their
 566          * pages they need to create a new bo to point to the new vma.
 567          *
 568          * However, that still leaves open the possibility of the vma
 569          * being copied upon fork. Which falls under the same userspace
 570          * synchronisation issue as a regular bo, except that this time
 571          * the process may not be expecting that a particular piece of
 572          * memory is tied to the GPU.
 573          *
 574          * Fortunately, we can hook into the mmu_notifier in order to
 575          * discard the page references prior to anything nasty happening
 576          * to the vma (discard or cloning) which should prevent the more
 577          * egregious cases from causing harm.
 578          */
 579
 580         if (obj->userptr.work) {
 581                 /* active flag should still be held for the pending work */
 582                 if (IS_ERR(obj->userptr.work))
 583                         return PTR_ERR(obj->userptr.work);
 584                 else
 585                         return -EAGAIN;
 586         }
 587
 588         pvec = NULL;
 589         pinned = 0;
 590
 591         if (mm == current->mm) {
 592                 pvec = kvmalloc_array(num_pages, sizeof(struct page *),
 593                                       GFP_KERNEL |
 594                                       __GFP_NORETRY |
 595                                       __GFP_NOWARN);
 596                 if (pvec) /* defer to worker if malloc fails */
 597                         pinned = __get_user_pages_fast(obj->userptr.ptr,
 598                                                        num_pages,
 599                                                        !i915_gem_object_is_readonly(obj),
 600                                                        pvec);
 601         }
 602
 603         active = false;
 604         if (pinned < 0) {
 605                 pages = ERR_PTR(pinned);
 606                 pinned = 0;
 607         } else if (pinned < num_pages) {
 608                 pages = __i915_gem_userptr_get_pages_schedule(obj);
 609                 active = pages == ERR_PTR(-EAGAIN);
 610         } else {
 611                 pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
 612                 active = !IS_ERR(pages);
 613         }
 614         if (active)
 615                 __i915_gem_userptr_set_active(obj, true);
 616
 617         if (IS_ERR(pages))
 618                 release_pages(pvec, pinned);
 619         kvfree(pvec);
 620
 621         return PTR_ERR_OR_ZERO(pages);
 622 }
 623
 624 static void
 625 i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
 626                            struct sg_table *pages)
 627 {
 628         struct sgt_iter sgt_iter;
 629         struct page *page;
 630
 631         /* Cancel any inflight work and force them to restart their gup */
 632         obj->userptr.work = NULL;
 633         __i915_gem_userptr_set_active(obj, false);
 634         if (!pages)
 635                 return;
 636
 637         __i915_gem_object_release_shmem(obj, pages, true);
 638         i915_gem_gtt_finish_pages(obj, pages);
 639
 640         /*
 641          * We always mark objects as dirty when they are used by the GPU,
 642          * just in case. However, if we set the vma as being read-only we know
 643          * that the object will never have been written to.
 644          */
 645         if (i915_gem_object_is_readonly(obj))
 646                 obj->mm.dirty = false;
 647
 648         for_each_sgt_page(page, sgt_iter, pages) {
 649                 if (obj->mm.dirty && trylock_page(page)) {
 650                         /*
 651                          * As this may not be anonymous memory (e.g. shmem)
 652                          * but exist on a real mapping, we have to lock
 653                          * the page in order to dirty it -- holding
 654                          * the page reference is not sufficient to
 655                          * prevent the inode from being truncated.
 656                          * Play safe and take the lock.
 657                          *
 658                          * However...!
 659                          *
 660                          * The mmu-notifier can be invalidated for a
 661                          * migrate_page, that is alreadying holding the lock
 662                          * on the page. Such a try_to_unmap() will result
 663                          * in us calling put_pages() and so recursively try
 664                          * to lock the page. We avoid that deadlock with
 665                          * a trylock_page() and in exchange we risk missing
 666                          * some page dirtying.
 667                          */
 668                         set_page_dirty(page);
 669                         unlock_page(page);
 670                 }
 671
 672                 mark_page_accessed(page);
 673                 put_page(page);
 674         }
 675         obj->mm.dirty = false;
 676
 677         sg_free_table(pages);
 678         kfree(pages);
 679 }
 680
 681 static void
 682 i915_gem_userptr_release(struct drm_i915_gem_object *obj)
 683 {
 684         i915_gem_userptr_release__mmu_notifier(obj);
 685         i915_gem_userptr_release__mm_struct(obj);
 686 }
 687
 688 static int
 689 i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
 690 {
 691         if (obj->userptr.mmu_object)
 692                 return 0;
 693
 694         return i915_gem_userptr_init__mmu_notifier(obj, 0);
 695 }
 696
 697 static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
 698         .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
 699                  I915_GEM_OBJECT_IS_SHRINKABLE |
 700                  I915_GEM_OBJECT_NO_GGTT |
 701                  I915_GEM_OBJECT_ASYNC_CANCEL,
 702         .get_pages = i915_gem_userptr_get_pages,
 703         .put_pages = i915_gem_userptr_put_pages,
 704         .dmabuf_export = i915_gem_userptr_dmabuf_export,
 705         .release = i915_gem_userptr_release,
 706 };
 707
 708 /*
 709  * Creates a new mm object that wraps some normal memory from the process
 710  * context - user memory.
 711  *
 712  * We impose several restrictions upon the memory being mapped
 713  * into the GPU.
 714  * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
 715  * 2. It must be normal system memory, not a pointer into another map of IO
 716  *    space (e.g. it must not be a GTT mmapping of another object).
 717  * 3. We only allow a bo as large as we could in theory map into the GTT,
 718  *    that is we limit the size to the total size of the GTT.
 719  * 4. The bo is marked as being snoopable. The backing pages are left
 720  *    accessible directly by the CPU, but reads and writes by the GPU may
 721  *    incur the cost of a snoop (unless you have an LLC architecture).
 722  *
 723  * Synchronisation between multiple users and the GPU is left to userspace
 724  * through the normal set-domain-ioctl. The kernel will enforce that the
 725  * GPU relinquishes the VMA before it is returned back to the system
 726  * i.e. upon free(), munmap() or process termination. However, the userspace
 727  * malloc() library may not immediately relinquish the VMA after free() and
 728  * instead reuse it whilst the GPU is still reading and writing to the VMA.
 729  * Caveat emptor.
 730  *
 731  * Also note, that the object created here is not currently a "first class"
 732  * object, in that several ioctls are banned. These are the CPU access
 733  * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
 734  * direct access via your pointer rather than use those ioctls. Another
 735  * restriction is that we do not allow userptr surfaces to be pinned to the
 736  * hardware and so we reject any attempt to create a framebuffer out of a
 737  * userptr.
 738  *
 739  * If you think this is a good interface to use to pass GPU memory between
 740  * drivers, please use dma-buf instead. In fact, wherever possible use
 741  * dma-buf instead.
 742  */
 743 int
 744 i915_gem_userptr_ioctl(struct drm_device *dev,
 745                        void *data,
 746                        struct drm_file *file)
 747 {
 748         static struct lock_class_key lock_class;
 749         struct drm_i915_private *dev_priv = to_i915(dev);
 750         struct drm_i915_gem_userptr *args = data;
 751         struct drm_i915_gem_object *obj;
 752         int ret;
 753         u32 handle;
 754
 755         if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) {
 756                 /* We cannot support coherent userptr objects on hw without
 757                  * LLC and broken snooping.
 758                  */
 759                 return -ENODEV;
 760         }
 761
 762         if (args->flags & ~(I915_USERPTR_READ_ONLY |
 763                             I915_USERPTR_UNSYNCHRONIZED))
 764                 return -EINVAL;
 765
 766         if (!args->user_size)
 767                 return -EINVAL;
 768
 769         if (offset_in_page(args->user_ptr | args->user_size))
 770                 return -EINVAL;
 771
 772         if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size))
 773                 return -EFAULT;
 774
 775         if (args->flags & I915_USERPTR_READ_ONLY) {
 776                 struct i915_address_space *vm;
 777
 778                 /*
 779                  * On almost all of the older hw, we cannot tell the GPU that
 780                  * a page is readonly.
 781                  */
 782                 vm = rcu_dereference_protected(dev_priv->kernel_context->vm,
 783                                                true); /* static vm */
 784                 if (!vm || !vm->has_read_only)
 785                         return -ENODEV;
 786         }
 787
 788         obj = i915_gem_object_alloc();
 789         if (obj == NULL)
 790                 return -ENOMEM;
 791
 792         drm_gem_private_object_init(dev, &obj->base, args->user_size);
 793         i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class);
 794         obj->read_domains = I915_GEM_DOMAIN_CPU;
 795         obj->write_domain = I915_GEM_DOMAIN_CPU;
 796         i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
 797
 798         obj->userptr.ptr = args->user_ptr;
 799         if (args->flags & I915_USERPTR_READ_ONLY)
 800                 i915_gem_object_set_readonly(obj);
 801
 802         /* And keep a pointer to the current->mm for resolving the user pages
 803          * at binding. This means that we need to hook into the mmu_notifier
 804          * in order to detect if the mmu is destroyed.
 805          */
 806         ret = i915_gem_userptr_init__mm_struct(obj);
 807         if (ret == 0)
 808                 ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
 809         if (ret == 0)
 810                 ret = drm_gem_handle_create(file, &obj->base, &handle);
 811
 812         /* drop reference from allocate - handle holds it now */
 813         i915_gem_object_put(obj);
 814         if (ret)
 815                 return ret;
 816
 817         args->handle = handle;
 818         return 0;
 819 }
 820
 821 int i915_gem_init_userptr(struct drm_i915_private *dev_priv)
 822 {
 823         mutex_init(&dev_priv->mm_lock);
 824         hash_init(dev_priv->mm_structs);
 825
 826         dev_priv->mm.userptr_wq =
 827                 alloc_workqueue("i915-userptr-acquire",
 828                                 WQ_HIGHPRI | WQ_UNBOUND,
 829                                 0);
 830         if (!dev_priv->mm.userptr_wq)
 831                 return -ENOMEM;
 832
 833         return 0;
 834 }
 835
 836 void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv)
 837 {
 838         destroy_workqueue(dev_priv->mm.userptr_wq);
 839 }