drivers/gpu/drm/i915/i915_gem_userptr.c

   1 /*
   2  * Copyright © 2012-2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #include <drm/i915_drm.h>
  26 #include "i915_drv.h"
  27 #include "i915_trace.h"
  28 #include "intel_drv.h"
  29 #include <linux/mmu_context.h>
  30 #include <linux/mmu_notifier.h>
  31 #include <linux/mempolicy.h>
  32 #include <linux/swap.h>
  33 #include <linux/sched/mm.h>
  34
  35 struct i915_mm_struct {
  36         struct mm_struct *mm;
  37         struct drm_i915_private *i915;
  38         struct i915_mmu_notifier *mn;
  39         struct hlist_node node;
  40         struct kref kref;
  41         struct work_struct work;
  42 };
  43
  44 #if defined(CONFIG_MMU_NOTIFIER)
  45 #include <linux/interval_tree.h>
  46
  47 struct i915_mmu_notifier {
  48         spinlock_t lock;
  49         struct hlist_node node;
  50         struct mmu_notifier mn;
  51         struct rb_root_cached objects;
  52         struct i915_mm_struct *mm;
  53 };
  54
  55 struct i915_mmu_object {
  56         struct i915_mmu_notifier *mn;
  57         struct drm_i915_gem_object *obj;
  58         struct interval_tree_node it;
  59 };
  60
  61 static void add_object(struct i915_mmu_object *mo)
  62 {
  63         GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb));
  64         interval_tree_insert(&mo->it, &mo->mn->objects);
  65 }
  66
  67 static void del_object(struct i915_mmu_object *mo)
  68 {
  69         if (RB_EMPTY_NODE(&mo->it.rb))
  70                 return;
  71
  72         interval_tree_remove(&mo->it, &mo->mn->objects);
  73         RB_CLEAR_NODE(&mo->it.rb);
  74 }
  75
  76 static void
  77 __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
  78 {
  79         struct i915_mmu_object *mo = obj->userptr.mmu_object;
  80
  81         /*
  82          * During mm_invalidate_range we need to cancel any userptr that
  83          * overlaps the range being invalidated. Doing so requires the
  84          * struct_mutex, and that risks recursion. In order to cause
  85          * recursion, the user must alias the userptr address space with
  86          * a GTT mmapping (possible with a MAP_FIXED) - then when we have
  87          * to invalidate that mmaping, mm_invalidate_range is called with
  88          * the userptr address *and* the struct_mutex held.  To prevent that
  89          * we set a flag under the i915_mmu_notifier spinlock to indicate
  90          * whether this object is valid.
  91          */
  92         if (!mo)
  93                 return;
  94
  95         spin_lock(&mo->mn->lock);
  96         if (value)
  97                 add_object(mo);
  98         else
  99                 del_object(mo);
 100         spin_unlock(&mo->mn->lock);
 101 }
 102
 103 static int
 104 userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 105                                   const struct mmu_notifier_range *range)
 106 {
 107         struct i915_mmu_notifier *mn =
 108                 container_of(_mn, struct i915_mmu_notifier, mn);
 109         struct interval_tree_node *it;
 110         struct mutex *unlock = NULL;
 111         unsigned long end;
 112         int ret = 0;
 113
 114         if (RB_EMPTY_ROOT(&mn->objects.rb_root))
 115                 return 0;
 116
 117         /* interval ranges are inclusive, but invalidate range is exclusive */
 118         end = range->end - 1;
 119
 120         spin_lock(&mn->lock);
 121         it = interval_tree_iter_first(&mn->objects, range->start, end);
 122         while (it) {
 123                 struct drm_i915_gem_object *obj;
 124
 125                 if (!range->blockable) {
 126                         ret = -EAGAIN;
 127                         break;
 128                 }
 129
 130                 /*
 131                  * The mmu_object is released late when destroying the
 132                  * GEM object so it is entirely possible to gain a
 133                  * reference on an object in the process of being freed
 134                  * since our serialisation is via the spinlock and not
 135                  * the struct_mutex - and consequently use it after it
 136                  * is freed and then double free it. To prevent that
 137                  * use-after-free we only acquire a reference on the
 138                  * object if it is not in the process of being destroyed.
 139                  */
 140                 obj = container_of(it, struct i915_mmu_object, it)->obj;
 141                 if (!kref_get_unless_zero(&obj->base.refcount)) {
 142                         it = interval_tree_iter_next(it, range->start, end);
 143                         continue;
 144                 }
 145                 spin_unlock(&mn->lock);
 146
 147                 if (!unlock) {
 148                         unlock = &mn->mm->i915->drm.struct_mutex;
 149
 150                         switch (mutex_trylock_recursive(unlock)) {
 151                         default:
 152                         case MUTEX_TRYLOCK_FAILED:
 153                                 if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) {
 154                                         i915_gem_object_put(obj);
 155                                         return -EINTR;
 156                                 }
 157                                 /* fall through */
 158                         case MUTEX_TRYLOCK_SUCCESS:
 159                                 break;
 160
 161                         case MUTEX_TRYLOCK_RECURSIVE:
 162                                 unlock = ERR_PTR(-EEXIST);
 163                                 break;
 164                         }
 165                 }
 166
 167                 ret = i915_gem_object_unbind(obj);
 168                 if (ret == 0)
 169                         ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
 170                 i915_gem_object_put(obj);
 171                 if (ret)
 172                         goto unlock;
 173
 174                 spin_lock(&mn->lock);
 175
 176                 /*
 177                  * As we do not (yet) protect the mmu from concurrent insertion
 178                  * over this range, there is no guarantee that this search will
 179                  * terminate given a pathologic workload.
 180                  */
 181                 it = interval_tree_iter_first(&mn->objects, range->start, end);
 182         }
 183         spin_unlock(&mn->lock);
 184
 185 unlock:
 186         if (!IS_ERR_OR_NULL(unlock))
 187                 mutex_unlock(unlock);
 188
 189         return ret;
 190
 191 }
 192
 193 static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
 194         .invalidate_range_start = userptr_mn_invalidate_range_start,
 195 };
 196
 197 static struct i915_mmu_notifier *
 198 i915_mmu_notifier_create(struct i915_mm_struct *mm)
 199 {
 200         struct i915_mmu_notifier *mn;
 201
 202         mn = kmalloc(sizeof(*mn), GFP_KERNEL);
 203         if (mn == NULL)
 204                 return ERR_PTR(-ENOMEM);
 205
 206         spin_lock_init(&mn->lock);
 207         mn->mn.ops = &i915_gem_userptr_notifier;
 208         mn->objects = RB_ROOT_CACHED;
 209         mn->mm = mm;
 210
 211         return mn;
 212 }
 213
 214 static void
 215 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 216 {
 217         struct i915_mmu_object *mo;
 218
 219         mo = fetch_and_zero(&obj->userptr.mmu_object);
 220         if (!mo)
 221                 return;
 222
 223         spin_lock(&mo->mn->lock);
 224         del_object(mo);
 225         spin_unlock(&mo->mn->lock);
 226         kfree(mo);
 227 }
 228
 229 static struct i915_mmu_notifier *
 230 i915_mmu_notifier_find(struct i915_mm_struct *mm)
 231 {
 232         struct i915_mmu_notifier *mn;
 233         int err = 0;
 234
 235         mn = mm->mn;
 236         if (mn)
 237                 return mn;
 238
 239         mn = i915_mmu_notifier_create(mm);
 240         if (IS_ERR(mn))
 241                 err = PTR_ERR(mn);
 242
 243         down_write(&mm->mm->mmap_sem);
 244         mutex_lock(&mm->i915->mm_lock);
 245         if (mm->mn == NULL && !err) {
 246                 /* Protected by mmap_sem (write-lock) */
 247                 err = __mmu_notifier_register(&mn->mn, mm->mm);
 248                 if (!err) {
 249                         /* Protected by mm_lock */
 250                         mm->mn = fetch_and_zero(&mn);
 251                 }
 252         } else if (mm->mn) {
 253                 /*
 254                  * Someone else raced and successfully installed the mmu
 255                  * notifier, we can cancel our own errors.
 256                  */
 257                 err = 0;
 258         }
 259         mutex_unlock(&mm->i915->mm_lock);
 260         up_write(&mm->mm->mmap_sem);
 261
 262         if (mn && !IS_ERR(mn))
 263                 kfree(mn);
 264
 265         return err ? ERR_PTR(err) : mm->mn;
 266 }
 267
 268 static int
 269 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
 270                                     unsigned flags)
 271 {
 272         struct i915_mmu_notifier *mn;
 273         struct i915_mmu_object *mo;
 274
 275         if (flags & I915_USERPTR_UNSYNCHRONIZED)
 276                 return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
 277
 278         if (WARN_ON(obj->userptr.mm == NULL))
 279                 return -EINVAL;
 280
 281         mn = i915_mmu_notifier_find(obj->userptr.mm);
 282         if (IS_ERR(mn))
 283                 return PTR_ERR(mn);
 284
 285         mo = kzalloc(sizeof(*mo), GFP_KERNEL);
 286         if (!mo)
 287                 return -ENOMEM;
 288
 289         mo->mn = mn;
 290         mo->obj = obj;
 291         mo->it.start = obj->userptr.ptr;
 292         mo->it.last = obj->userptr.ptr + obj->base.size - 1;
 293         RB_CLEAR_NODE(&mo->it.rb);
 294
 295         obj->userptr.mmu_object = mo;
 296         return 0;
 297 }
 298
 299 static void
 300 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
 301                        struct mm_struct *mm)
 302 {
 303         if (mn == NULL)
 304                 return;
 305
 306         mmu_notifier_unregister(&mn->mn, mm);
 307         kfree(mn);
 308 }
 309
 310 #else
 311
 312 static void
 313 __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
 314 {
 315 }
 316
 317 static void
 318 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 319 {
 320 }
 321
 322 static int
 323 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
 324                                     unsigned flags)
 325 {
 326         if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
 327                 return -ENODEV;
 328
 329         if (!capable(CAP_SYS_ADMIN))
 330                 return -EPERM;
 331
 332         return 0;
 333 }
 334
 335 static void
 336 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
 337                        struct mm_struct *mm)
 338 {
 339 }
 340
 341 #endif
 342
 343 static struct i915_mm_struct *
 344 __i915_mm_struct_find(struct drm_i915_private *dev_priv, struct mm_struct *real)
 345 {
 346         struct i915_mm_struct *mm;
 347
 348         /* Protected by dev_priv->mm_lock */
 349         hash_for_each_possible(dev_priv->mm_structs, mm, node, (unsigned long)real)
 350                 if (mm->mm == real)
 351                         return mm;
 352
 353         return NULL;
 354 }
 355
 356 static int
 357 i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
 358 {
 359         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 360         struct i915_mm_struct *mm;
 361         int ret = 0;
 362
 363         /* During release of the GEM object we hold the struct_mutex. This
 364          * precludes us from calling mmput() at that time as that may be
 365          * the last reference and so call exit_mmap(). exit_mmap() will
 366          * attempt to reap the vma, and if we were holding a GTT mmap
 367          * would then call drm_gem_vm_close() and attempt to reacquire
 368          * the struct mutex. So in order to avoid that recursion, we have
 369          * to defer releasing the mm reference until after we drop the
 370          * struct_mutex, i.e. we need to schedule a worker to do the clean
 371          * up.
 372          */
 373         mutex_lock(&dev_priv->mm_lock);
 374         mm = __i915_mm_struct_find(dev_priv, current->mm);
 375         if (mm == NULL) {
 376                 mm = kmalloc(sizeof(*mm), GFP_KERNEL);
 377                 if (mm == NULL) {
 378                         ret = -ENOMEM;
 379                         goto out;
 380                 }
 381
 382                 kref_init(&mm->kref);
 383                 mm->i915 = to_i915(obj->base.dev);
 384
 385                 mm->mm = current->mm;
 386                 mmgrab(current->mm);
 387
 388                 mm->mn = NULL;
 389
 390                 /* Protected by dev_priv->mm_lock */
 391                 hash_add(dev_priv->mm_structs,
 392                          &mm->node, (unsigned long)mm->mm);
 393         } else
 394                 kref_get(&mm->kref);
 395
 396         obj->userptr.mm = mm;
 397 out:
 398         mutex_unlock(&dev_priv->mm_lock);
 399         return ret;
 400 }
 401
 402 static void
 403 __i915_mm_struct_free__worker(struct work_struct *work)
 404 {
 405         struct i915_mm_struct *mm = container_of(work, typeof(*mm), work);
 406         i915_mmu_notifier_free(mm->mn, mm->mm);
 407         mmdrop(mm->mm);
 408         kfree(mm);
 409 }
 410
 411 static void
 412 __i915_mm_struct_free(struct kref *kref)
 413 {
 414         struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref);
 415
 416         /* Protected by dev_priv->mm_lock */
 417         hash_del(&mm->node);
 418         mutex_unlock(&mm->i915->mm_lock);
 419
 420         INIT_WORK(&mm->work, __i915_mm_struct_free__worker);
 421         queue_work(mm->i915->mm.userptr_wq, &mm->work);
 422 }
 423
 424 static void
 425 i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj)
 426 {
 427         if (obj->userptr.mm == NULL)
 428                 return;
 429
 430         kref_put_mutex(&obj->userptr.mm->kref,
 431                        __i915_mm_struct_free,
 432                        &to_i915(obj->base.dev)->mm_lock);
 433         obj->userptr.mm = NULL;
 434 }
 435
 436 struct get_pages_work {
 437         struct work_struct work;
 438         struct drm_i915_gem_object *obj;
 439         struct task_struct *task;
 440 };
 441
 442 static struct sg_table *
 443 __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
 444                                struct page **pvec, int num_pages)
 445 {
 446         unsigned int max_segment = i915_sg_segment_size();
 447         struct sg_table *st;
 448         unsigned int sg_page_sizes;
 449         int ret;
 450
 451         st = kmalloc(sizeof(*st), GFP_KERNEL);
 452         if (!st)
 453                 return ERR_PTR(-ENOMEM);
 454
 455 alloc_table:
 456         ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
 457                                           0, num_pages << PAGE_SHIFT,
 458                                           max_segment,
 459                                           GFP_KERNEL);
 460         if (ret) {
 461                 kfree(st);
 462                 return ERR_PTR(ret);
 463         }
 464
 465         ret = i915_gem_gtt_prepare_pages(obj, st);
 466         if (ret) {
 467                 sg_free_table(st);
 468
 469                 if (max_segment > PAGE_SIZE) {
 470                         max_segment = PAGE_SIZE;
 471                         goto alloc_table;
 472                 }
 473
 474                 kfree(st);
 475                 return ERR_PTR(ret);
 476         }
 477
 478         sg_page_sizes = i915_sg_page_sizes(st->sgl);
 479
 480         __i915_gem_object_set_pages(obj, st, sg_page_sizes);
 481
 482         return st;
 483 }
 484
 485 static void
 486 __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 487 {
 488         struct get_pages_work *work = container_of(_work, typeof(*work), work);
 489         struct drm_i915_gem_object *obj = work->obj;
 490         const int npages = obj->base.size >> PAGE_SHIFT;
 491         struct page **pvec;
 492         int pinned, ret;
 493
 494         ret = -ENOMEM;
 495         pinned = 0;
 496
 497         pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 498         if (pvec != NULL) {
 499                 struct mm_struct *mm = obj->userptr.mm->mm;
 500                 unsigned int flags = 0;
 501
 502                 if (!i915_gem_object_is_readonly(obj))
 503                         flags |= FOLL_WRITE;
 504
 505                 ret = -EFAULT;
 506                 if (mmget_not_zero(mm)) {
 507                         down_read(&mm->mmap_sem);
 508                         while (pinned < npages) {
 509                                 ret = get_user_pages_remote
 510                                         (work->task, mm,
 511                                          obj->userptr.ptr + pinned * PAGE_SIZE,
 512                                          npages - pinned,
 513                                          flags,
 514                                          pvec + pinned, NULL, NULL);
 515                                 if (ret < 0)
 516                                         break;
 517
 518                                 pinned += ret;
 519                         }
 520                         up_read(&mm->mmap_sem);
 521                         mmput(mm);
 522                 }
 523         }
 524
 525         mutex_lock(&obj->mm.lock);
 526         if (obj->userptr.work == &work->work) {
 527                 struct sg_table *pages = ERR_PTR(ret);
 528
 529                 if (pinned == npages) {
 530                         pages = __i915_gem_userptr_alloc_pages(obj, pvec,
 531                                                                npages);
 532                         if (!IS_ERR(pages)) {
 533                                 pinned = 0;
 534                                 pages = NULL;
 535                         }
 536                 }
 537
 538                 obj->userptr.work = ERR_CAST(pages);
 539                 if (IS_ERR(pages))
 540                         __i915_gem_userptr_set_active(obj, false);
 541         }
 542         mutex_unlock(&obj->mm.lock);
 543
 544         release_pages(pvec, pinned);
 545         kvfree(pvec);
 546
 547         i915_gem_object_put(obj);
 548         put_task_struct(work->task);
 549         kfree(work);
 550 }
 551
 552 static struct sg_table *
 553 __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
 554 {
 555         struct get_pages_work *work;
 556
 557         /* Spawn a worker so that we can acquire the
 558          * user pages without holding our mutex. Access
 559          * to the user pages requires mmap_sem, and we have
 560          * a strict lock ordering of mmap_sem, struct_mutex -
 561          * we already hold struct_mutex here and so cannot
 562          * call gup without encountering a lock inversion.
 563          *
 564          * Userspace will keep on repeating the operation
 565          * (thanks to EAGAIN) until either we hit the fast
 566          * path or the worker completes. If the worker is
 567          * cancelled or superseded, the task is still run
 568          * but the results ignored. (This leads to
 569          * complications that we may have a stray object
 570          * refcount that we need to be wary of when
 571          * checking for existing objects during creation.)
 572          * If the worker encounters an error, it reports
 573          * that error back to this function through
 574          * obj->userptr.work = ERR_PTR.
 575          */
 576         work = kmalloc(sizeof(*work), GFP_KERNEL);
 577         if (work == NULL)
 578                 return ERR_PTR(-ENOMEM);
 579
 580         obj->userptr.work = &work->work;
 581
 582         work->obj = i915_gem_object_get(obj);
 583
 584         work->task = current;
 585         get_task_struct(work->task);
 586
 587         INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
 588         queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work);
 589
 590         return ERR_PTR(-EAGAIN);
 591 }
 592
 593 static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 594 {
 595         const int num_pages = obj->base.size >> PAGE_SHIFT;
 596         struct mm_struct *mm = obj->userptr.mm->mm;
 597         struct page **pvec;
 598         struct sg_table *pages;
 599         bool active;
 600         int pinned;
 601
 602         /* If userspace should engineer that these pages are replaced in
 603          * the vma between us binding this page into the GTT and completion
 604          * of rendering... Their loss. If they change the mapping of their
 605          * pages they need to create a new bo to point to the new vma.
 606          *
 607          * However, that still leaves open the possibility of the vma
 608          * being copied upon fork. Which falls under the same userspace
 609          * synchronisation issue as a regular bo, except that this time
 610          * the process may not be expecting that a particular piece of
 611          * memory is tied to the GPU.
 612          *
 613          * Fortunately, we can hook into the mmu_notifier in order to
 614          * discard the page references prior to anything nasty happening
 615          * to the vma (discard or cloning) which should prevent the more
 616          * egregious cases from causing harm.
 617          */
 618
 619         if (obj->userptr.work) {
 620                 /* active flag should still be held for the pending work */
 621                 if (IS_ERR(obj->userptr.work))
 622                         return PTR_ERR(obj->userptr.work);
 623                 else
 624                         return -EAGAIN;
 625         }
 626
 627         pvec = NULL;
 628         pinned = 0;
 629
 630         if (mm == current->mm) {
 631                 pvec = kvmalloc_array(num_pages, sizeof(struct page *),
 632                                       GFP_KERNEL |
 633                                       __GFP_NORETRY |
 634                                       __GFP_NOWARN);
 635                 if (pvec) /* defer to worker if malloc fails */
 636                         pinned = __get_user_pages_fast(obj->userptr.ptr,
 637                                                        num_pages,
 638                                                        !i915_gem_object_is_readonly(obj),
 639                                                        pvec);
 640         }
 641
 642         active = false;
 643         if (pinned < 0) {
 644                 pages = ERR_PTR(pinned);
 645                 pinned = 0;
 646         } else if (pinned < num_pages) {
 647                 pages = __i915_gem_userptr_get_pages_schedule(obj);
 648                 active = pages == ERR_PTR(-EAGAIN);
 649         } else {
 650                 pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
 651                 active = !IS_ERR(pages);
 652         }
 653         if (active)
 654                 __i915_gem_userptr_set_active(obj, true);
 655
 656         if (IS_ERR(pages))
 657                 release_pages(pvec, pinned);
 658         kvfree(pvec);
 659
 660         return PTR_ERR_OR_ZERO(pages);
 661 }
 662
 663 static void
 664 i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
 665                            struct sg_table *pages)
 666 {
 667         struct sgt_iter sgt_iter;
 668         struct page *page;
 669
 670         /* Cancel any inflight work and force them to restart their gup */
 671         obj->userptr.work = NULL;
 672         __i915_gem_userptr_set_active(obj, false);
 673         if (!pages)
 674                 return;
 675
 676         if (obj->mm.madv != I915_MADV_WILLNEED)
 677                 obj->mm.dirty = false;
 678
 679         i915_gem_gtt_finish_pages(obj, pages);
 680
 681         for_each_sgt_page(page, sgt_iter, pages) {
 682                 if (obj->mm.dirty)
 683                         set_page_dirty(page);
 684
 685                 mark_page_accessed(page);
 686                 put_page(page);
 687         }
 688         obj->mm.dirty = false;
 689
 690         sg_free_table(pages);
 691         kfree(pages);
 692 }
 693
 694 static void
 695 i915_gem_userptr_release(struct drm_i915_gem_object *obj)
 696 {
 697         i915_gem_userptr_release__mmu_notifier(obj);
 698         i915_gem_userptr_release__mm_struct(obj);
 699 }
 700
 701 static int
 702 i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
 703 {
 704         if (obj->userptr.mmu_object)
 705                 return 0;
 706
 707         return i915_gem_userptr_init__mmu_notifier(obj, 0);
 708 }
 709
 710 static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
 711         .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
 712                  I915_GEM_OBJECT_IS_SHRINKABLE |
 713                  I915_GEM_OBJECT_ASYNC_CANCEL,
 714         .get_pages = i915_gem_userptr_get_pages,
 715         .put_pages = i915_gem_userptr_put_pages,
 716         .dmabuf_export = i915_gem_userptr_dmabuf_export,
 717         .release = i915_gem_userptr_release,
 718 };
 719
 720 /*
 721  * Creates a new mm object that wraps some normal memory from the process
 722  * context - user memory.
 723  *
 724  * We impose several restrictions upon the memory being mapped
 725  * into the GPU.
 726  * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
 727  * 2. It must be normal system memory, not a pointer into another map of IO
 728  *    space (e.g. it must not be a GTT mmapping of another object).
 729  * 3. We only allow a bo as large as we could in theory map into the GTT,
 730  *    that is we limit the size to the total size of the GTT.
 731  * 4. The bo is marked as being snoopable. The backing pages are left
 732  *    accessible directly by the CPU, but reads and writes by the GPU may
 733  *    incur the cost of a snoop (unless you have an LLC architecture).
 734  *
 735  * Synchronisation between multiple users and the GPU is left to userspace
 736  * through the normal set-domain-ioctl. The kernel will enforce that the
 737  * GPU relinquishes the VMA before it is returned back to the system
 738  * i.e. upon free(), munmap() or process termination. However, the userspace
 739  * malloc() library may not immediately relinquish the VMA after free() and
 740  * instead reuse it whilst the GPU is still reading and writing to the VMA.
 741  * Caveat emptor.
 742  *
 743  * Also note, that the object created here is not currently a "first class"
 744  * object, in that several ioctls are banned. These are the CPU access
 745  * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
 746  * direct access via your pointer rather than use those ioctls. Another
 747  * restriction is that we do not allow userptr surfaces to be pinned to the
 748  * hardware and so we reject any attempt to create a framebuffer out of a
 749  * userptr.
 750  *
 751  * If you think this is a good interface to use to pass GPU memory between
 752  * drivers, please use dma-buf instead. In fact, wherever possible use
 753  * dma-buf instead.
 754  */
 755 int
 756 i915_gem_userptr_ioctl(struct drm_device *dev,
 757                        void *data,
 758                        struct drm_file *file)
 759 {
 760         struct drm_i915_private *dev_priv = to_i915(dev);
 761         struct drm_i915_gem_userptr *args = data;
 762         struct drm_i915_gem_object *obj;
 763         int ret;
 764         u32 handle;
 765
 766         if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) {
 767                 /* We cannot support coherent userptr objects on hw without
 768                  * LLC and broken snooping.
 769                  */
 770                 return -ENODEV;
 771         }
 772
 773         if (args->flags & ~(I915_USERPTR_READ_ONLY |
 774                             I915_USERPTR_UNSYNCHRONIZED))
 775                 return -EINVAL;
 776
 777         if (!args->user_size)
 778                 return -EINVAL;
 779
 780         if (offset_in_page(args->user_ptr | args->user_size))
 781                 return -EINVAL;
 782
 783         if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size))
 784                 return -EFAULT;
 785
 786         if (args->flags & I915_USERPTR_READ_ONLY) {
 787                 struct i915_hw_ppgtt *ppgtt;
 788
 789                 /*
 790                  * On almost all of the older hw, we cannot tell the GPU that
 791                  * a page is readonly.
 792                  */
 793                 ppgtt = dev_priv->kernel_context->ppgtt;
 794                 if (!ppgtt || !ppgtt->vm.has_read_only)
 795                         return -ENODEV;
 796         }
 797
 798         obj = i915_gem_object_alloc(dev_priv);
 799         if (obj == NULL)
 800                 return -ENOMEM;
 801
 802         drm_gem_private_object_init(dev, &obj->base, args->user_size);
 803         i915_gem_object_init(obj, &i915_gem_userptr_ops);
 804         obj->read_domains = I915_GEM_DOMAIN_CPU;
 805         obj->write_domain = I915_GEM_DOMAIN_CPU;
 806         i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
 807
 808         obj->userptr.ptr = args->user_ptr;
 809         if (args->flags & I915_USERPTR_READ_ONLY)
 810                 i915_gem_object_set_readonly(obj);
 811
 812         /* And keep a pointer to the current->mm for resolving the user pages
 813          * at binding. This means that we need to hook into the mmu_notifier
 814          * in order to detect if the mmu is destroyed.
 815          */
 816         ret = i915_gem_userptr_init__mm_struct(obj);
 817         if (ret == 0)
 818                 ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
 819         if (ret == 0)
 820                 ret = drm_gem_handle_create(file, &obj->base, &handle);
 821
 822         /* drop reference from allocate - handle holds it now */
 823         i915_gem_object_put(obj);
 824         if (ret)
 825                 return ret;
 826
 827         args->handle = handle;
 828         return 0;
 829 }
 830
 831 int i915_gem_init_userptr(struct drm_i915_private *dev_priv)
 832 {
 833         mutex_init(&dev_priv->mm_lock);
 834         hash_init(dev_priv->mm_structs);
 835
 836         dev_priv->mm.userptr_wq =
 837                 alloc_workqueue("i915-userptr-acquire",
 838                                 WQ_HIGHPRI | WQ_UNBOUND,
 839                                 0);
 840         if (!dev_priv->mm.userptr_wq)
 841                 return -ENOMEM;
 842
 843         return 0;
 844 }
 845
 846 void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv)
 847 {
 848         destroy_workqueue(dev_priv->mm.userptr_wq);
 849 }