drm/i915/ehl: Set proper eu slice/subslice parameters for EHL
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / i915_gem.c
1 /*
2  * Copyright © 2008-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27
28 #include <drm/drm_vma_manager.h>
29 #include <drm/drm_pci.h>
30 #include <drm/i915_drm.h>
31 #include <linux/dma-fence-array.h>
32 #include <linux/kthread.h>
33 #include <linux/reservation.h>
34 #include <linux/shmem_fs.h>
35 #include <linux/slab.h>
36 #include <linux/stop_machine.h>
37 #include <linux/swap.h>
38 #include <linux/pci.h>
39 #include <linux/dma-buf.h>
40 #include <linux/mman.h>
41
42 #include "i915_drv.h"
43 #include "i915_gem_clflush.h"
44 #include "i915_gemfs.h"
45 #include "i915_globals.h"
46 #include "i915_reset.h"
47 #include "i915_trace.h"
48 #include "i915_vgpu.h"
49
50 #include "intel_drv.h"
51 #include "intel_frontbuffer.h"
52 #include "intel_mocs.h"
53 #include "intel_workarounds.h"
54
55 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
56
57 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
58 {
59         if (obj->cache_dirty)
60                 return false;
61
62         if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
63                 return true;
64
65         return obj->pin_global; /* currently in use by HW, keep flushed */
66 }
67
68 static int
69 insert_mappable_node(struct i915_ggtt *ggtt,
70                      struct drm_mm_node *node, u32 size)
71 {
72         memset(node, 0, sizeof(*node));
73         return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
74                                            size, 0, I915_COLOR_UNEVICTABLE,
75                                            0, ggtt->mappable_end,
76                                            DRM_MM_INSERT_LOW);
77 }
78
79 static void
80 remove_mappable_node(struct drm_mm_node *node)
81 {
82         drm_mm_remove_node(node);
83 }
84
85 /* some bookkeeping */
86 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
87                                   u64 size)
88 {
89         spin_lock(&dev_priv->mm.object_stat_lock);
90         dev_priv->mm.object_count++;
91         dev_priv->mm.object_memory += size;
92         spin_unlock(&dev_priv->mm.object_stat_lock);
93 }
94
95 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
96                                      u64 size)
97 {
98         spin_lock(&dev_priv->mm.object_stat_lock);
99         dev_priv->mm.object_count--;
100         dev_priv->mm.object_memory -= size;
101         spin_unlock(&dev_priv->mm.object_stat_lock);
102 }
103
104 static void __i915_gem_park(struct drm_i915_private *i915)
105 {
106         intel_wakeref_t wakeref;
107
108         GEM_TRACE("\n");
109
110         lockdep_assert_held(&i915->drm.struct_mutex);
111         GEM_BUG_ON(i915->gt.active_requests);
112         GEM_BUG_ON(!list_empty(&i915->gt.active_rings));
113
114         if (!i915->gt.awake)
115                 return;
116
117         /*
118          * Be paranoid and flush a concurrent interrupt to make sure
119          * we don't reactivate any irq tasklets after parking.
120          *
121          * FIXME: Note that even though we have waited for execlists to be idle,
122          * there may still be an in-flight interrupt even though the CSB
123          * is now empty. synchronize_irq() makes sure that a residual interrupt
124          * is completed before we continue, but it doesn't prevent the HW from
125          * raising a spurious interrupt later. To complete the shield we should
126          * coordinate disabling the CS irq with flushing the interrupts.
127          */
128         synchronize_irq(i915->drm.irq);
129
130         intel_engines_park(i915);
131         i915_timelines_park(i915);
132
133         i915_pmu_gt_parked(i915);
134         i915_vma_parked(i915);
135
136         wakeref = fetch_and_zero(&i915->gt.awake);
137         GEM_BUG_ON(!wakeref);
138
139         if (INTEL_GEN(i915) >= 6)
140                 gen6_rps_idle(i915);
141
142         intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
143
144         i915_globals_park();
145 }
146
147 void i915_gem_park(struct drm_i915_private *i915)
148 {
149         GEM_TRACE("\n");
150
151         lockdep_assert_held(&i915->drm.struct_mutex);
152         GEM_BUG_ON(i915->gt.active_requests);
153
154         if (!i915->gt.awake)
155                 return;
156
157         /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */
158         mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100));
159 }
160
161 void i915_gem_unpark(struct drm_i915_private *i915)
162 {
163         GEM_TRACE("\n");
164
165         lockdep_assert_held(&i915->drm.struct_mutex);
166         GEM_BUG_ON(!i915->gt.active_requests);
167         assert_rpm_wakelock_held(i915);
168
169         if (i915->gt.awake)
170                 return;
171
172         /*
173          * It seems that the DMC likes to transition between the DC states a lot
174          * when there are no connected displays (no active power domains) during
175          * command submission.
176          *
177          * This activity has negative impact on the performance of the chip with
178          * huge latencies observed in the interrupt handler and elsewhere.
179          *
180          * Work around it by grabbing a GT IRQ power domain whilst there is any
181          * GT activity, preventing any DC state transitions.
182          */
183         i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
184         GEM_BUG_ON(!i915->gt.awake);
185
186         i915_globals_unpark();
187
188         intel_enable_gt_powersave(i915);
189         i915_update_gfx_val(i915);
190         if (INTEL_GEN(i915) >= 6)
191                 gen6_rps_busy(i915);
192         i915_pmu_gt_unparked(i915);
193
194         intel_engines_unpark(i915);
195
196         i915_queue_hangcheck(i915);
197
198         queue_delayed_work(i915->wq,
199                            &i915->gt.retire_work,
200                            round_jiffies_up_relative(HZ));
201 }
202
203 int
204 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
205                             struct drm_file *file)
206 {
207         struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
208         struct drm_i915_gem_get_aperture *args = data;
209         struct i915_vma *vma;
210         u64 pinned;
211
212         mutex_lock(&ggtt->vm.mutex);
213
214         pinned = ggtt->vm.reserved;
215         list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
216                 if (i915_vma_is_pinned(vma))
217                         pinned += vma->node.size;
218
219         mutex_unlock(&ggtt->vm.mutex);
220
221         args->aper_size = ggtt->vm.total;
222         args->aper_available_size = args->aper_size - pinned;
223
224         return 0;
225 }
226
227 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
228 {
229         struct address_space *mapping = obj->base.filp->f_mapping;
230         drm_dma_handle_t *phys;
231         struct sg_table *st;
232         struct scatterlist *sg;
233         char *vaddr;
234         int i;
235         int err;
236
237         if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
238                 return -EINVAL;
239
240         /* Always aligning to the object size, allows a single allocation
241          * to handle all possible callers, and given typical object sizes,
242          * the alignment of the buddy allocation will naturally match.
243          */
244         phys = drm_pci_alloc(obj->base.dev,
245                              roundup_pow_of_two(obj->base.size),
246                              roundup_pow_of_two(obj->base.size));
247         if (!phys)
248                 return -ENOMEM;
249
250         vaddr = phys->vaddr;
251         for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
252                 struct page *page;
253                 char *src;
254
255                 page = shmem_read_mapping_page(mapping, i);
256                 if (IS_ERR(page)) {
257                         err = PTR_ERR(page);
258                         goto err_phys;
259                 }
260
261                 src = kmap_atomic(page);
262                 memcpy(vaddr, src, PAGE_SIZE);
263                 drm_clflush_virt_range(vaddr, PAGE_SIZE);
264                 kunmap_atomic(src);
265
266                 put_page(page);
267                 vaddr += PAGE_SIZE;
268         }
269
270         i915_gem_chipset_flush(to_i915(obj->base.dev));
271
272         st = kmalloc(sizeof(*st), GFP_KERNEL);
273         if (!st) {
274                 err = -ENOMEM;
275                 goto err_phys;
276         }
277
278         if (sg_alloc_table(st, 1, GFP_KERNEL)) {
279                 kfree(st);
280                 err = -ENOMEM;
281                 goto err_phys;
282         }
283
284         sg = st->sgl;
285         sg->offset = 0;
286         sg->length = obj->base.size;
287
288         sg_dma_address(sg) = phys->busaddr;
289         sg_dma_len(sg) = obj->base.size;
290
291         obj->phys_handle = phys;
292
293         __i915_gem_object_set_pages(obj, st, sg->length);
294
295         return 0;
296
297 err_phys:
298         drm_pci_free(obj->base.dev, phys);
299
300         return err;
301 }
302
303 static void __start_cpu_write(struct drm_i915_gem_object *obj)
304 {
305         obj->read_domains = I915_GEM_DOMAIN_CPU;
306         obj->write_domain = I915_GEM_DOMAIN_CPU;
307         if (cpu_write_needs_clflush(obj))
308                 obj->cache_dirty = true;
309 }
310
311 static void
312 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
313                                 struct sg_table *pages,
314                                 bool needs_clflush)
315 {
316         GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
317
318         if (obj->mm.madv == I915_MADV_DONTNEED)
319                 obj->mm.dirty = false;
320
321         if (needs_clflush &&
322             (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
323             !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
324                 drm_clflush_sg(pages);
325
326         __start_cpu_write(obj);
327 }
328
329 static void
330 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
331                                struct sg_table *pages)
332 {
333         __i915_gem_object_release_shmem(obj, pages, false);
334
335         if (obj->mm.dirty) {
336                 struct address_space *mapping = obj->base.filp->f_mapping;
337                 char *vaddr = obj->phys_handle->vaddr;
338                 int i;
339
340                 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
341                         struct page *page;
342                         char *dst;
343
344                         page = shmem_read_mapping_page(mapping, i);
345                         if (IS_ERR(page))
346                                 continue;
347
348                         dst = kmap_atomic(page);
349                         drm_clflush_virt_range(vaddr, PAGE_SIZE);
350                         memcpy(dst, vaddr, PAGE_SIZE);
351                         kunmap_atomic(dst);
352
353                         set_page_dirty(page);
354                         if (obj->mm.madv == I915_MADV_WILLNEED)
355                                 mark_page_accessed(page);
356                         put_page(page);
357                         vaddr += PAGE_SIZE;
358                 }
359                 obj->mm.dirty = false;
360         }
361
362         sg_free_table(pages);
363         kfree(pages);
364
365         drm_pci_free(obj->base.dev, obj->phys_handle);
366 }
367
368 static void
369 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
370 {
371         i915_gem_object_unpin_pages(obj);
372 }
373
374 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
375         .get_pages = i915_gem_object_get_pages_phys,
376         .put_pages = i915_gem_object_put_pages_phys,
377         .release = i915_gem_object_release_phys,
378 };
379
380 static const struct drm_i915_gem_object_ops i915_gem_object_ops;
381
382 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
383 {
384         struct i915_vma *vma;
385         LIST_HEAD(still_in_list);
386         int ret;
387
388         lockdep_assert_held(&obj->base.dev->struct_mutex);
389
390         /* Closed vma are removed from the obj->vma_list - but they may
391          * still have an active binding on the object. To remove those we
392          * must wait for all rendering to complete to the object (as unbinding
393          * must anyway), and retire the requests.
394          */
395         ret = i915_gem_object_set_to_cpu_domain(obj, false);
396         if (ret)
397                 return ret;
398
399         spin_lock(&obj->vma.lock);
400         while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
401                                                        struct i915_vma,
402                                                        obj_link))) {
403                 list_move_tail(&vma->obj_link, &still_in_list);
404                 spin_unlock(&obj->vma.lock);
405
406                 ret = i915_vma_unbind(vma);
407
408                 spin_lock(&obj->vma.lock);
409         }
410         list_splice(&still_in_list, &obj->vma.list);
411         spin_unlock(&obj->vma.lock);
412
413         return ret;
414 }
415
416 static long
417 i915_gem_object_wait_fence(struct dma_fence *fence,
418                            unsigned int flags,
419                            long timeout)
420 {
421         struct i915_request *rq;
422
423         BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
424
425         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
426                 return timeout;
427
428         if (!dma_fence_is_i915(fence))
429                 return dma_fence_wait_timeout(fence,
430                                               flags & I915_WAIT_INTERRUPTIBLE,
431                                               timeout);
432
433         rq = to_request(fence);
434         if (i915_request_completed(rq))
435                 goto out;
436
437         timeout = i915_request_wait(rq, flags, timeout);
438
439 out:
440         if (flags & I915_WAIT_LOCKED && i915_request_completed(rq))
441                 i915_request_retire_upto(rq);
442
443         return timeout;
444 }
445
446 static long
447 i915_gem_object_wait_reservation(struct reservation_object *resv,
448                                  unsigned int flags,
449                                  long timeout)
450 {
451         unsigned int seq = __read_seqcount_begin(&resv->seq);
452         struct dma_fence *excl;
453         bool prune_fences = false;
454
455         if (flags & I915_WAIT_ALL) {
456                 struct dma_fence **shared;
457                 unsigned int count, i;
458                 int ret;
459
460                 ret = reservation_object_get_fences_rcu(resv,
461                                                         &excl, &count, &shared);
462                 if (ret)
463                         return ret;
464
465                 for (i = 0; i < count; i++) {
466                         timeout = i915_gem_object_wait_fence(shared[i],
467                                                              flags, timeout);
468                         if (timeout < 0)
469                                 break;
470
471                         dma_fence_put(shared[i]);
472                 }
473
474                 for (; i < count; i++)
475                         dma_fence_put(shared[i]);
476                 kfree(shared);
477
478                 /*
479                  * If both shared fences and an exclusive fence exist,
480                  * then by construction the shared fences must be later
481                  * than the exclusive fence. If we successfully wait for
482                  * all the shared fences, we know that the exclusive fence
483                  * must all be signaled. If all the shared fences are
484                  * signaled, we can prune the array and recover the
485                  * floating references on the fences/requests.
486                  */
487                 prune_fences = count && timeout >= 0;
488         } else {
489                 excl = reservation_object_get_excl_rcu(resv);
490         }
491
492         if (excl && timeout >= 0)
493                 timeout = i915_gem_object_wait_fence(excl, flags, timeout);
494
495         dma_fence_put(excl);
496
497         /*
498          * Opportunistically prune the fences iff we know they have *all* been
499          * signaled and that the reservation object has not been changed (i.e.
500          * no new fences have been added).
501          */
502         if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
503                 if (reservation_object_trylock(resv)) {
504                         if (!__read_seqcount_retry(&resv->seq, seq))
505                                 reservation_object_add_excl_fence(resv, NULL);
506                         reservation_object_unlock(resv);
507                 }
508         }
509
510         return timeout;
511 }
512
513 static void __fence_set_priority(struct dma_fence *fence,
514                                  const struct i915_sched_attr *attr)
515 {
516         struct i915_request *rq;
517         struct intel_engine_cs *engine;
518
519         if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
520                 return;
521
522         rq = to_request(fence);
523         engine = rq->engine;
524
525         local_bh_disable();
526         rcu_read_lock(); /* RCU serialisation for set-wedged protection */
527         if (engine->schedule)
528                 engine->schedule(rq, attr);
529         rcu_read_unlock();
530         local_bh_enable(); /* kick the tasklets if queues were reprioritised */
531 }
532
533 static void fence_set_priority(struct dma_fence *fence,
534                                const struct i915_sched_attr *attr)
535 {
536         /* Recurse once into a fence-array */
537         if (dma_fence_is_array(fence)) {
538                 struct dma_fence_array *array = to_dma_fence_array(fence);
539                 int i;
540
541                 for (i = 0; i < array->num_fences; i++)
542                         __fence_set_priority(array->fences[i], attr);
543         } else {
544                 __fence_set_priority(fence, attr);
545         }
546 }
547
548 int
549 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
550                               unsigned int flags,
551                               const struct i915_sched_attr *attr)
552 {
553         struct dma_fence *excl;
554
555         if (flags & I915_WAIT_ALL) {
556                 struct dma_fence **shared;
557                 unsigned int count, i;
558                 int ret;
559
560                 ret = reservation_object_get_fences_rcu(obj->resv,
561                                                         &excl, &count, &shared);
562                 if (ret)
563                         return ret;
564
565                 for (i = 0; i < count; i++) {
566                         fence_set_priority(shared[i], attr);
567                         dma_fence_put(shared[i]);
568                 }
569
570                 kfree(shared);
571         } else {
572                 excl = reservation_object_get_excl_rcu(obj->resv);
573         }
574
575         if (excl) {
576                 fence_set_priority(excl, attr);
577                 dma_fence_put(excl);
578         }
579         return 0;
580 }
581
582 /**
583  * Waits for rendering to the object to be completed
584  * @obj: i915 gem object
585  * @flags: how to wait (under a lock, for all rendering or just for writes etc)
586  * @timeout: how long to wait
587  */
588 int
589 i915_gem_object_wait(struct drm_i915_gem_object *obj,
590                      unsigned int flags,
591                      long timeout)
592 {
593         might_sleep();
594         GEM_BUG_ON(timeout < 0);
595
596         timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
597         return timeout < 0 ? timeout : 0;
598 }
599
600 static int
601 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
602                      struct drm_i915_gem_pwrite *args,
603                      struct drm_file *file)
604 {
605         void *vaddr = obj->phys_handle->vaddr + args->offset;
606         char __user *user_data = u64_to_user_ptr(args->data_ptr);
607
608         /* We manually control the domain here and pretend that it
609          * remains coherent i.e. in the GTT domain, like shmem_pwrite.
610          */
611         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
612         if (copy_from_user(vaddr, user_data, args->size))
613                 return -EFAULT;
614
615         drm_clflush_virt_range(vaddr, args->size);
616         i915_gem_chipset_flush(to_i915(obj->base.dev));
617
618         intel_fb_obj_flush(obj, ORIGIN_CPU);
619         return 0;
620 }
621
622 static int
623 i915_gem_create(struct drm_file *file,
624                 struct drm_i915_private *dev_priv,
625                 u64 size,
626                 u32 *handle_p)
627 {
628         struct drm_i915_gem_object *obj;
629         int ret;
630         u32 handle;
631
632         size = roundup(size, PAGE_SIZE);
633         if (size == 0)
634                 return -EINVAL;
635
636         /* Allocate the new object */
637         obj = i915_gem_object_create(dev_priv, size);
638         if (IS_ERR(obj))
639                 return PTR_ERR(obj);
640
641         ret = drm_gem_handle_create(file, &obj->base, &handle);
642         /* drop reference from allocate - handle holds it now */
643         i915_gem_object_put(obj);
644         if (ret)
645                 return ret;
646
647         *handle_p = handle;
648         return 0;
649 }
650
651 int
652 i915_gem_dumb_create(struct drm_file *file,
653                      struct drm_device *dev,
654                      struct drm_mode_create_dumb *args)
655 {
656         /* have to work out size/pitch and return them */
657         args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
658         args->size = args->pitch * args->height;
659         return i915_gem_create(file, to_i915(dev),
660                                args->size, &args->handle);
661 }
662
663 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
664 {
665         return !(obj->cache_level == I915_CACHE_NONE ||
666                  obj->cache_level == I915_CACHE_WT);
667 }
668
669 /**
670  * Creates a new mm object and returns a handle to it.
671  * @dev: drm device pointer
672  * @data: ioctl data blob
673  * @file: drm file pointer
674  */
675 int
676 i915_gem_create_ioctl(struct drm_device *dev, void *data,
677                       struct drm_file *file)
678 {
679         struct drm_i915_private *dev_priv = to_i915(dev);
680         struct drm_i915_gem_create *args = data;
681
682         i915_gem_flush_free_objects(dev_priv);
683
684         return i915_gem_create(file, dev_priv,
685                                args->size, &args->handle);
686 }
687
688 static inline enum fb_op_origin
689 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
690 {
691         return (domain == I915_GEM_DOMAIN_GTT ?
692                 obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
693 }
694
695 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
696 {
697         intel_wakeref_t wakeref;
698
699         /*
700          * No actual flushing is required for the GTT write domain for reads
701          * from the GTT domain. Writes to it "immediately" go to main memory
702          * as far as we know, so there's no chipset flush. It also doesn't
703          * land in the GPU render cache.
704          *
705          * However, we do have to enforce the order so that all writes through
706          * the GTT land before any writes to the device, such as updates to
707          * the GATT itself.
708          *
709          * We also have to wait a bit for the writes to land from the GTT.
710          * An uncached read (i.e. mmio) seems to be ideal for the round-trip
711          * timing. This issue has only been observed when switching quickly
712          * between GTT writes and CPU reads from inside the kernel on recent hw,
713          * and it appears to only affect discrete GTT blocks (i.e. on LLC
714          * system agents we cannot reproduce this behaviour, until Cannonlake
715          * that was!).
716          */
717
718         wmb();
719
720         if (INTEL_INFO(dev_priv)->has_coherent_ggtt)
721                 return;
722
723         i915_gem_chipset_flush(dev_priv);
724
725         with_intel_runtime_pm(dev_priv, wakeref) {
726                 spin_lock_irq(&dev_priv->uncore.lock);
727
728                 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
729
730                 spin_unlock_irq(&dev_priv->uncore.lock);
731         }
732 }
733
734 static void
735 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
736 {
737         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
738         struct i915_vma *vma;
739
740         if (!(obj->write_domain & flush_domains))
741                 return;
742
743         switch (obj->write_domain) {
744         case I915_GEM_DOMAIN_GTT:
745                 i915_gem_flush_ggtt_writes(dev_priv);
746
747                 intel_fb_obj_flush(obj,
748                                    fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
749
750                 for_each_ggtt_vma(vma, obj) {
751                         if (vma->iomap)
752                                 continue;
753
754                         i915_vma_unset_ggtt_write(vma);
755                 }
756                 break;
757
758         case I915_GEM_DOMAIN_WC:
759                 wmb();
760                 break;
761
762         case I915_GEM_DOMAIN_CPU:
763                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
764                 break;
765
766         case I915_GEM_DOMAIN_RENDER:
767                 if (gpu_write_needs_clflush(obj))
768                         obj->cache_dirty = true;
769                 break;
770         }
771
772         obj->write_domain = 0;
773 }
774
775 /*
776  * Pins the specified object's pages and synchronizes the object with
777  * GPU accesses. Sets needs_clflush to non-zero if the caller should
778  * flush the object from the CPU cache.
779  */
780 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
781                                     unsigned int *needs_clflush)
782 {
783         int ret;
784
785         lockdep_assert_held(&obj->base.dev->struct_mutex);
786
787         *needs_clflush = 0;
788         if (!i915_gem_object_has_struct_page(obj))
789                 return -ENODEV;
790
791         ret = i915_gem_object_wait(obj,
792                                    I915_WAIT_INTERRUPTIBLE |
793                                    I915_WAIT_LOCKED,
794                                    MAX_SCHEDULE_TIMEOUT);
795         if (ret)
796                 return ret;
797
798         ret = i915_gem_object_pin_pages(obj);
799         if (ret)
800                 return ret;
801
802         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
803             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
804                 ret = i915_gem_object_set_to_cpu_domain(obj, false);
805                 if (ret)
806                         goto err_unpin;
807                 else
808                         goto out;
809         }
810
811         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
812
813         /* If we're not in the cpu read domain, set ourself into the gtt
814          * read domain and manually flush cachelines (if required). This
815          * optimizes for the case when the gpu will dirty the data
816          * anyway again before the next pread happens.
817          */
818         if (!obj->cache_dirty &&
819             !(obj->read_domains & I915_GEM_DOMAIN_CPU))
820                 *needs_clflush = CLFLUSH_BEFORE;
821
822 out:
823         /* return with the pages pinned */
824         return 0;
825
826 err_unpin:
827         i915_gem_object_unpin_pages(obj);
828         return ret;
829 }
830
831 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
832                                      unsigned int *needs_clflush)
833 {
834         int ret;
835
836         lockdep_assert_held(&obj->base.dev->struct_mutex);
837
838         *needs_clflush = 0;
839         if (!i915_gem_object_has_struct_page(obj))
840                 return -ENODEV;
841
842         ret = i915_gem_object_wait(obj,
843                                    I915_WAIT_INTERRUPTIBLE |
844                                    I915_WAIT_LOCKED |
845                                    I915_WAIT_ALL,
846                                    MAX_SCHEDULE_TIMEOUT);
847         if (ret)
848                 return ret;
849
850         ret = i915_gem_object_pin_pages(obj);
851         if (ret)
852                 return ret;
853
854         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
855             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
856                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
857                 if (ret)
858                         goto err_unpin;
859                 else
860                         goto out;
861         }
862
863         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
864
865         /* If we're not in the cpu write domain, set ourself into the
866          * gtt write domain and manually flush cachelines (as required).
867          * This optimizes for the case when the gpu will use the data
868          * right away and we therefore have to clflush anyway.
869          */
870         if (!obj->cache_dirty) {
871                 *needs_clflush |= CLFLUSH_AFTER;
872
873                 /*
874                  * Same trick applies to invalidate partially written
875                  * cachelines read before writing.
876                  */
877                 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
878                         *needs_clflush |= CLFLUSH_BEFORE;
879         }
880
881 out:
882         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
883         obj->mm.dirty = true;
884         /* return with the pages pinned */
885         return 0;
886
887 err_unpin:
888         i915_gem_object_unpin_pages(obj);
889         return ret;
890 }
891
892 static int
893 shmem_pread(struct page *page, int offset, int len, char __user *user_data,
894             bool needs_clflush)
895 {
896         char *vaddr;
897         int ret;
898
899         vaddr = kmap(page);
900
901         if (needs_clflush)
902                 drm_clflush_virt_range(vaddr + offset, len);
903
904         ret = __copy_to_user(user_data, vaddr + offset, len);
905
906         kunmap(page);
907
908         return ret ? -EFAULT : 0;
909 }
910
911 static int
912 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
913                      struct drm_i915_gem_pread *args)
914 {
915         char __user *user_data;
916         u64 remain;
917         unsigned int needs_clflush;
918         unsigned int idx, offset;
919         int ret;
920
921         ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
922         if (ret)
923                 return ret;
924
925         ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
926         mutex_unlock(&obj->base.dev->struct_mutex);
927         if (ret)
928                 return ret;
929
930         remain = args->size;
931         user_data = u64_to_user_ptr(args->data_ptr);
932         offset = offset_in_page(args->offset);
933         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
934                 struct page *page = i915_gem_object_get_page(obj, idx);
935                 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
936
937                 ret = shmem_pread(page, offset, length, user_data,
938                                   needs_clflush);
939                 if (ret)
940                         break;
941
942                 remain -= length;
943                 user_data += length;
944                 offset = 0;
945         }
946
947         i915_gem_obj_finish_shmem_access(obj);
948         return ret;
949 }
950
951 static inline bool
952 gtt_user_read(struct io_mapping *mapping,
953               loff_t base, int offset,
954               char __user *user_data, int length)
955 {
956         void __iomem *vaddr;
957         unsigned long unwritten;
958
959         /* We can use the cpu mem copy function because this is X86. */
960         vaddr = io_mapping_map_atomic_wc(mapping, base);
961         unwritten = __copy_to_user_inatomic(user_data,
962                                             (void __force *)vaddr + offset,
963                                             length);
964         io_mapping_unmap_atomic(vaddr);
965         if (unwritten) {
966                 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
967                 unwritten = copy_to_user(user_data,
968                                          (void __force *)vaddr + offset,
969                                          length);
970                 io_mapping_unmap(vaddr);
971         }
972         return unwritten;
973 }
974
975 static int
976 i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
977                    const struct drm_i915_gem_pread *args)
978 {
979         struct drm_i915_private *i915 = to_i915(obj->base.dev);
980         struct i915_ggtt *ggtt = &i915->ggtt;
981         intel_wakeref_t wakeref;
982         struct drm_mm_node node;
983         struct i915_vma *vma;
984         void __user *user_data;
985         u64 remain, offset;
986         int ret;
987
988         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
989         if (ret)
990                 return ret;
991
992         wakeref = intel_runtime_pm_get(i915);
993         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
994                                        PIN_MAPPABLE |
995                                        PIN_NONFAULT |
996                                        PIN_NONBLOCK);
997         if (!IS_ERR(vma)) {
998                 node.start = i915_ggtt_offset(vma);
999                 node.allocated = false;
1000                 ret = i915_vma_put_fence(vma);
1001                 if (ret) {
1002                         i915_vma_unpin(vma);
1003                         vma = ERR_PTR(ret);
1004                 }
1005         }
1006         if (IS_ERR(vma)) {
1007                 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1008                 if (ret)
1009                         goto out_unlock;
1010                 GEM_BUG_ON(!node.allocated);
1011         }
1012
1013         ret = i915_gem_object_set_to_gtt_domain(obj, false);
1014         if (ret)
1015                 goto out_unpin;
1016
1017         mutex_unlock(&i915->drm.struct_mutex);
1018
1019         user_data = u64_to_user_ptr(args->data_ptr);
1020         remain = args->size;
1021         offset = args->offset;
1022
1023         while (remain > 0) {
1024                 /* Operation in this page
1025                  *
1026                  * page_base = page offset within aperture
1027                  * page_offset = offset within page
1028                  * page_length = bytes to copy for this page
1029                  */
1030                 u32 page_base = node.start;
1031                 unsigned page_offset = offset_in_page(offset);
1032                 unsigned page_length = PAGE_SIZE - page_offset;
1033                 page_length = remain < page_length ? remain : page_length;
1034                 if (node.allocated) {
1035                         wmb();
1036                         ggtt->vm.insert_page(&ggtt->vm,
1037                                              i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1038                                              node.start, I915_CACHE_NONE, 0);
1039                         wmb();
1040                 } else {
1041                         page_base += offset & PAGE_MASK;
1042                 }
1043
1044                 if (gtt_user_read(&ggtt->iomap, page_base, page_offset,
1045                                   user_data, page_length)) {
1046                         ret = -EFAULT;
1047                         break;
1048                 }
1049
1050                 remain -= page_length;
1051                 user_data += page_length;
1052                 offset += page_length;
1053         }
1054
1055         mutex_lock(&i915->drm.struct_mutex);
1056 out_unpin:
1057         if (node.allocated) {
1058                 wmb();
1059                 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
1060                 remove_mappable_node(&node);
1061         } else {
1062                 i915_vma_unpin(vma);
1063         }
1064 out_unlock:
1065         intel_runtime_pm_put(i915, wakeref);
1066         mutex_unlock(&i915->drm.struct_mutex);
1067
1068         return ret;
1069 }
1070
1071 /**
1072  * Reads data from the object referenced by handle.
1073  * @dev: drm device pointer
1074  * @data: ioctl data blob
1075  * @file: drm file pointer
1076  *
1077  * On error, the contents of *data are undefined.
1078  */
1079 int
1080 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
1081                      struct drm_file *file)
1082 {
1083         struct drm_i915_gem_pread *args = data;
1084         struct drm_i915_gem_object *obj;
1085         int ret;
1086
1087         if (args->size == 0)
1088                 return 0;
1089
1090         if (!access_ok(u64_to_user_ptr(args->data_ptr),
1091                        args->size))
1092                 return -EFAULT;
1093
1094         obj = i915_gem_object_lookup(file, args->handle);
1095         if (!obj)
1096                 return -ENOENT;
1097
1098         /* Bounds check source.  */
1099         if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1100                 ret = -EINVAL;
1101                 goto out;
1102         }
1103
1104         trace_i915_gem_object_pread(obj, args->offset, args->size);
1105
1106         ret = i915_gem_object_wait(obj,
1107                                    I915_WAIT_INTERRUPTIBLE,
1108                                    MAX_SCHEDULE_TIMEOUT);
1109         if (ret)
1110                 goto out;
1111
1112         ret = i915_gem_object_pin_pages(obj);
1113         if (ret)
1114                 goto out;
1115
1116         ret = i915_gem_shmem_pread(obj, args);
1117         if (ret == -EFAULT || ret == -ENODEV)
1118                 ret = i915_gem_gtt_pread(obj, args);
1119
1120         i915_gem_object_unpin_pages(obj);
1121 out:
1122         i915_gem_object_put(obj);
1123         return ret;
1124 }
1125
1126 /* This is the fast write path which cannot handle
1127  * page faults in the source data
1128  */
1129
1130 static inline bool
1131 ggtt_write(struct io_mapping *mapping,
1132            loff_t base, int offset,
1133            char __user *user_data, int length)
1134 {
1135         void __iomem *vaddr;
1136         unsigned long unwritten;
1137
1138         /* We can use the cpu mem copy function because this is X86. */
1139         vaddr = io_mapping_map_atomic_wc(mapping, base);
1140         unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset,
1141                                                       user_data, length);
1142         io_mapping_unmap_atomic(vaddr);
1143         if (unwritten) {
1144                 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
1145                 unwritten = copy_from_user((void __force *)vaddr + offset,
1146                                            user_data, length);
1147                 io_mapping_unmap(vaddr);
1148         }
1149
1150         return unwritten;
1151 }
1152
1153 /**
1154  * This is the fast pwrite path, where we copy the data directly from the
1155  * user into the GTT, uncached.
1156  * @obj: i915 GEM object
1157  * @args: pwrite arguments structure
1158  */
1159 static int
1160 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
1161                          const struct drm_i915_gem_pwrite *args)
1162 {
1163         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1164         struct i915_ggtt *ggtt = &i915->ggtt;
1165         intel_wakeref_t wakeref;
1166         struct drm_mm_node node;
1167         struct i915_vma *vma;
1168         u64 remain, offset;
1169         void __user *user_data;
1170         int ret;
1171
1172         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1173         if (ret)
1174                 return ret;
1175
1176         if (i915_gem_object_has_struct_page(obj)) {
1177                 /*
1178                  * Avoid waking the device up if we can fallback, as
1179                  * waking/resuming is very slow (worst-case 10-100 ms
1180                  * depending on PCI sleeps and our own resume time).
1181                  * This easily dwarfs any performance advantage from
1182                  * using the cache bypass of indirect GGTT access.
1183                  */
1184                 wakeref = intel_runtime_pm_get_if_in_use(i915);
1185                 if (!wakeref) {
1186                         ret = -EFAULT;
1187                         goto out_unlock;
1188                 }
1189         } else {
1190                 /* No backing pages, no fallback, we must force GGTT access */
1191                 wakeref = intel_runtime_pm_get(i915);
1192         }
1193
1194         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1195                                        PIN_MAPPABLE |
1196                                        PIN_NONFAULT |
1197                                        PIN_NONBLOCK);
1198         if (!IS_ERR(vma)) {
1199                 node.start = i915_ggtt_offset(vma);
1200                 node.allocated = false;
1201                 ret = i915_vma_put_fence(vma);
1202                 if (ret) {
1203                         i915_vma_unpin(vma);
1204                         vma = ERR_PTR(ret);
1205                 }
1206         }
1207         if (IS_ERR(vma)) {
1208                 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1209                 if (ret)
1210                         goto out_rpm;
1211                 GEM_BUG_ON(!node.allocated);
1212         }
1213
1214         ret = i915_gem_object_set_to_gtt_domain(obj, true);
1215         if (ret)
1216                 goto out_unpin;
1217
1218         mutex_unlock(&i915->drm.struct_mutex);
1219
1220         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1221
1222         user_data = u64_to_user_ptr(args->data_ptr);
1223         offset = args->offset;
1224         remain = args->size;
1225         while (remain) {
1226                 /* Operation in this page
1227                  *
1228                  * page_base = page offset within aperture
1229                  * page_offset = offset within page
1230                  * page_length = bytes to copy for this page
1231                  */
1232                 u32 page_base = node.start;
1233                 unsigned int page_offset = offset_in_page(offset);
1234                 unsigned int page_length = PAGE_SIZE - page_offset;
1235                 page_length = remain < page_length ? remain : page_length;
1236                 if (node.allocated) {
1237                         wmb(); /* flush the write before we modify the GGTT */
1238                         ggtt->vm.insert_page(&ggtt->vm,
1239                                              i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1240                                              node.start, I915_CACHE_NONE, 0);
1241                         wmb(); /* flush modifications to the GGTT (insert_page) */
1242                 } else {
1243                         page_base += offset & PAGE_MASK;
1244                 }
1245                 /* If we get a fault while copying data, then (presumably) our
1246                  * source page isn't available.  Return the error and we'll
1247                  * retry in the slow path.
1248                  * If the object is non-shmem backed, we retry again with the
1249                  * path that handles page fault.
1250                  */
1251                 if (ggtt_write(&ggtt->iomap, page_base, page_offset,
1252                                user_data, page_length)) {
1253                         ret = -EFAULT;
1254                         break;
1255                 }
1256
1257                 remain -= page_length;
1258                 user_data += page_length;
1259                 offset += page_length;
1260         }
1261         intel_fb_obj_flush(obj, ORIGIN_CPU);
1262
1263         mutex_lock(&i915->drm.struct_mutex);
1264 out_unpin:
1265         if (node.allocated) {
1266                 wmb();
1267                 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
1268                 remove_mappable_node(&node);
1269         } else {
1270                 i915_vma_unpin(vma);
1271         }
1272 out_rpm:
1273         intel_runtime_pm_put(i915, wakeref);
1274 out_unlock:
1275         mutex_unlock(&i915->drm.struct_mutex);
1276         return ret;
1277 }
1278
1279 /* Per-page copy function for the shmem pwrite fastpath.
1280  * Flushes invalid cachelines before writing to the target if
1281  * needs_clflush_before is set and flushes out any written cachelines after
1282  * writing if needs_clflush is set.
1283  */
1284 static int
1285 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
1286              bool needs_clflush_before,
1287              bool needs_clflush_after)
1288 {
1289         char *vaddr;
1290         int ret;
1291
1292         vaddr = kmap(page);
1293
1294         if (needs_clflush_before)
1295                 drm_clflush_virt_range(vaddr + offset, len);
1296
1297         ret = __copy_from_user(vaddr + offset, user_data, len);
1298         if (!ret && needs_clflush_after)
1299                 drm_clflush_virt_range(vaddr + offset, len);
1300
1301         kunmap(page);
1302
1303         return ret ? -EFAULT : 0;
1304 }
1305
1306 static int
1307 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
1308                       const struct drm_i915_gem_pwrite *args)
1309 {
1310         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1311         void __user *user_data;
1312         u64 remain;
1313         unsigned int partial_cacheline_write;
1314         unsigned int needs_clflush;
1315         unsigned int offset, idx;
1316         int ret;
1317
1318         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1319         if (ret)
1320                 return ret;
1321
1322         ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1323         mutex_unlock(&i915->drm.struct_mutex);
1324         if (ret)
1325                 return ret;
1326
1327         /* If we don't overwrite a cacheline completely we need to be
1328          * careful to have up-to-date data by first clflushing. Don't
1329          * overcomplicate things and flush the entire patch.
1330          */
1331         partial_cacheline_write = 0;
1332         if (needs_clflush & CLFLUSH_BEFORE)
1333                 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
1334
1335         user_data = u64_to_user_ptr(args->data_ptr);
1336         remain = args->size;
1337         offset = offset_in_page(args->offset);
1338         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1339                 struct page *page = i915_gem_object_get_page(obj, idx);
1340                 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
1341
1342                 ret = shmem_pwrite(page, offset, length, user_data,
1343                                    (offset | length) & partial_cacheline_write,
1344                                    needs_clflush & CLFLUSH_AFTER);
1345                 if (ret)
1346                         break;
1347
1348                 remain -= length;
1349                 user_data += length;
1350                 offset = 0;
1351         }
1352
1353         intel_fb_obj_flush(obj, ORIGIN_CPU);
1354         i915_gem_obj_finish_shmem_access(obj);
1355         return ret;
1356 }
1357
1358 /**
1359  * Writes data to the object referenced by handle.
1360  * @dev: drm device
1361  * @data: ioctl data blob
1362  * @file: drm file
1363  *
1364  * On error, the contents of the buffer that were to be modified are undefined.
1365  */
1366 int
1367 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1368                       struct drm_file *file)
1369 {
1370         struct drm_i915_gem_pwrite *args = data;
1371         struct drm_i915_gem_object *obj;
1372         int ret;
1373
1374         if (args->size == 0)
1375                 return 0;
1376
1377         if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size))
1378                 return -EFAULT;
1379
1380         obj = i915_gem_object_lookup(file, args->handle);
1381         if (!obj)
1382                 return -ENOENT;
1383
1384         /* Bounds check destination. */
1385         if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1386                 ret = -EINVAL;
1387                 goto err;
1388         }
1389
1390         /* Writes not allowed into this read-only object */
1391         if (i915_gem_object_is_readonly(obj)) {
1392                 ret = -EINVAL;
1393                 goto err;
1394         }
1395
1396         trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1397
1398         ret = -ENODEV;
1399         if (obj->ops->pwrite)
1400                 ret = obj->ops->pwrite(obj, args);
1401         if (ret != -ENODEV)
1402                 goto err;
1403
1404         ret = i915_gem_object_wait(obj,
1405                                    I915_WAIT_INTERRUPTIBLE |
1406                                    I915_WAIT_ALL,
1407                                    MAX_SCHEDULE_TIMEOUT);
1408         if (ret)
1409                 goto err;
1410
1411         ret = i915_gem_object_pin_pages(obj);
1412         if (ret)
1413                 goto err;
1414
1415         ret = -EFAULT;
1416         /* We can only do the GTT pwrite on untiled buffers, as otherwise
1417          * it would end up going through the fenced access, and we'll get
1418          * different detiling behavior between reading and writing.
1419          * pread/pwrite currently are reading and writing from the CPU
1420          * perspective, requiring manual detiling by the client.
1421          */
1422         if (!i915_gem_object_has_struct_page(obj) ||
1423             cpu_write_needs_clflush(obj))
1424                 /* Note that the gtt paths might fail with non-page-backed user
1425                  * pointers (e.g. gtt mappings when moving data between
1426                  * textures). Fallback to the shmem path in that case.
1427                  */
1428                 ret = i915_gem_gtt_pwrite_fast(obj, args);
1429
1430         if (ret == -EFAULT || ret == -ENOSPC) {
1431                 if (obj->phys_handle)
1432                         ret = i915_gem_phys_pwrite(obj, args, file);
1433                 else
1434                         ret = i915_gem_shmem_pwrite(obj, args);
1435         }
1436
1437         i915_gem_object_unpin_pages(obj);
1438 err:
1439         i915_gem_object_put(obj);
1440         return ret;
1441 }
1442
1443 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
1444 {
1445         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1446         struct list_head *list;
1447         struct i915_vma *vma;
1448
1449         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
1450
1451         mutex_lock(&i915->ggtt.vm.mutex);
1452         for_each_ggtt_vma(vma, obj) {
1453                 if (!drm_mm_node_allocated(&vma->node))
1454                         continue;
1455
1456                 list_move_tail(&vma->vm_link, &vma->vm->bound_list);
1457         }
1458         mutex_unlock(&i915->ggtt.vm.mutex);
1459
1460         spin_lock(&i915->mm.obj_lock);
1461         list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
1462         list_move_tail(&obj->mm.link, list);
1463         spin_unlock(&i915->mm.obj_lock);
1464 }
1465
1466 /**
1467  * Called when user space prepares to use an object with the CPU, either
1468  * through the mmap ioctl's mapping or a GTT mapping.
1469  * @dev: drm device
1470  * @data: ioctl data blob
1471  * @file: drm file
1472  */
1473 int
1474 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1475                           struct drm_file *file)
1476 {
1477         struct drm_i915_gem_set_domain *args = data;
1478         struct drm_i915_gem_object *obj;
1479         u32 read_domains = args->read_domains;
1480         u32 write_domain = args->write_domain;
1481         int err;
1482
1483         /* Only handle setting domains to types used by the CPU. */
1484         if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1485                 return -EINVAL;
1486
1487         /*
1488          * Having something in the write domain implies it's in the read
1489          * domain, and only that read domain.  Enforce that in the request.
1490          */
1491         if (write_domain && read_domains != write_domain)
1492                 return -EINVAL;
1493
1494         if (!read_domains)
1495                 return 0;
1496
1497         obj = i915_gem_object_lookup(file, args->handle);
1498         if (!obj)
1499                 return -ENOENT;
1500
1501         /*
1502          * Already in the desired write domain? Nothing for us to do!
1503          *
1504          * We apply a little bit of cunning here to catch a broader set of
1505          * no-ops. If obj->write_domain is set, we must be in the same
1506          * obj->read_domains, and only that domain. Therefore, if that
1507          * obj->write_domain matches the request read_domains, we are
1508          * already in the same read/write domain and can skip the operation,
1509          * without having to further check the requested write_domain.
1510          */
1511         if (READ_ONCE(obj->write_domain) == read_domains) {
1512                 err = 0;
1513                 goto out;
1514         }
1515
1516         /*
1517          * Try to flush the object off the GPU without holding the lock.
1518          * We will repeat the flush holding the lock in the normal manner
1519          * to catch cases where we are gazumped.
1520          */
1521         err = i915_gem_object_wait(obj,
1522                                    I915_WAIT_INTERRUPTIBLE |
1523                                    I915_WAIT_PRIORITY |
1524                                    (write_domain ? I915_WAIT_ALL : 0),
1525                                    MAX_SCHEDULE_TIMEOUT);
1526         if (err)
1527                 goto out;
1528
1529         /*
1530          * Proxy objects do not control access to the backing storage, ergo
1531          * they cannot be used as a means to manipulate the cache domain
1532          * tracking for that backing storage. The proxy object is always
1533          * considered to be outside of any cache domain.
1534          */
1535         if (i915_gem_object_is_proxy(obj)) {
1536                 err = -ENXIO;
1537                 goto out;
1538         }
1539
1540         /*
1541          * Flush and acquire obj->pages so that we are coherent through
1542          * direct access in memory with previous cached writes through
1543          * shmemfs and that our cache domain tracking remains valid.
1544          * For example, if the obj->filp was moved to swap without us
1545          * being notified and releasing the pages, we would mistakenly
1546          * continue to assume that the obj remained out of the CPU cached
1547          * domain.
1548          */
1549         err = i915_gem_object_pin_pages(obj);
1550         if (err)
1551                 goto out;
1552
1553         err = i915_mutex_lock_interruptible(dev);
1554         if (err)
1555                 goto out_unpin;
1556
1557         if (read_domains & I915_GEM_DOMAIN_WC)
1558                 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
1559         else if (read_domains & I915_GEM_DOMAIN_GTT)
1560                 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
1561         else
1562                 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
1563
1564         /* And bump the LRU for this access */
1565         i915_gem_object_bump_inactive_ggtt(obj);
1566
1567         mutex_unlock(&dev->struct_mutex);
1568
1569         if (write_domain != 0)
1570                 intel_fb_obj_invalidate(obj,
1571                                         fb_write_origin(obj, write_domain));
1572
1573 out_unpin:
1574         i915_gem_object_unpin_pages(obj);
1575 out:
1576         i915_gem_object_put(obj);
1577         return err;
1578 }
1579
1580 /**
1581  * Called when user space has done writes to this buffer
1582  * @dev: drm device
1583  * @data: ioctl data blob
1584  * @file: drm file
1585  */
1586 int
1587 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1588                          struct drm_file *file)
1589 {
1590         struct drm_i915_gem_sw_finish *args = data;
1591         struct drm_i915_gem_object *obj;
1592
1593         obj = i915_gem_object_lookup(file, args->handle);
1594         if (!obj)
1595                 return -ENOENT;
1596
1597         /*
1598          * Proxy objects are barred from CPU access, so there is no
1599          * need to ban sw_finish as it is a nop.
1600          */
1601
1602         /* Pinned buffers may be scanout, so flush the cache */
1603         i915_gem_object_flush_if_display(obj);
1604         i915_gem_object_put(obj);
1605
1606         return 0;
1607 }
1608
1609 static inline bool
1610 __vma_matches(struct vm_area_struct *vma, struct file *filp,
1611               unsigned long addr, unsigned long size)
1612 {
1613         if (vma->vm_file != filp)
1614                 return false;
1615
1616         return vma->vm_start == addr &&
1617                (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
1618 }
1619
1620 /**
1621  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1622  *                       it is mapped to.
1623  * @dev: drm device
1624  * @data: ioctl data blob
1625  * @file: drm file
1626  *
1627  * While the mapping holds a reference on the contents of the object, it doesn't
1628  * imply a ref on the object itself.
1629  *
1630  * IMPORTANT:
1631  *
1632  * DRM driver writers who look a this function as an example for how to do GEM
1633  * mmap support, please don't implement mmap support like here. The modern way
1634  * to implement DRM mmap support is with an mmap offset ioctl (like
1635  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1636  * That way debug tooling like valgrind will understand what's going on, hiding
1637  * the mmap call in a driver private ioctl will break that. The i915 driver only
1638  * does cpu mmaps this way because we didn't know better.
1639  */
1640 int
1641 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1642                     struct drm_file *file)
1643 {
1644         struct drm_i915_gem_mmap *args = data;
1645         struct drm_i915_gem_object *obj;
1646         unsigned long addr;
1647
1648         if (args->flags & ~(I915_MMAP_WC))
1649                 return -EINVAL;
1650
1651         if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1652                 return -ENODEV;
1653
1654         obj = i915_gem_object_lookup(file, args->handle);
1655         if (!obj)
1656                 return -ENOENT;
1657
1658         /* prime objects have no backing filp to GEM mmap
1659          * pages from.
1660          */
1661         if (!obj->base.filp) {
1662                 addr = -ENXIO;
1663                 goto err;
1664         }
1665
1666         if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
1667                 addr = -EINVAL;
1668                 goto err;
1669         }
1670
1671         addr = vm_mmap(obj->base.filp, 0, args->size,
1672                        PROT_READ | PROT_WRITE, MAP_SHARED,
1673                        args->offset);
1674         if (IS_ERR_VALUE(addr))
1675                 goto err;
1676
1677         if (args->flags & I915_MMAP_WC) {
1678                 struct mm_struct *mm = current->mm;
1679                 struct vm_area_struct *vma;
1680
1681                 if (down_write_killable(&mm->mmap_sem)) {
1682                         addr = -EINTR;
1683                         goto err;
1684                 }
1685                 vma = find_vma(mm, addr);
1686                 if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
1687                         vma->vm_page_prot =
1688                                 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1689                 else
1690                         addr = -ENOMEM;
1691                 up_write(&mm->mmap_sem);
1692                 if (IS_ERR_VALUE(addr))
1693                         goto err;
1694
1695                 /* This may race, but that's ok, it only gets set */
1696                 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1697         }
1698         i915_gem_object_put(obj);
1699
1700         args->addr_ptr = (u64)addr;
1701         return 0;
1702
1703 err:
1704         i915_gem_object_put(obj);
1705         return addr;
1706 }
1707
1708 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
1709 {
1710         return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
1711 }
1712
1713 /**
1714  * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1715  *
1716  * A history of the GTT mmap interface:
1717  *
1718  * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1719  *     aligned and suitable for fencing, and still fit into the available
1720  *     mappable space left by the pinned display objects. A classic problem
1721  *     we called the page-fault-of-doom where we would ping-pong between
1722  *     two objects that could not fit inside the GTT and so the memcpy
1723  *     would page one object in at the expense of the other between every
1724  *     single byte.
1725  *
1726  * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1727  *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1728  *     object is too large for the available space (or simply too large
1729  *     for the mappable aperture!), a view is created instead and faulted
1730  *     into userspace. (This view is aligned and sized appropriately for
1731  *     fenced access.)
1732  *
1733  * 2 - Recognise WC as a separate cache domain so that we can flush the
1734  *     delayed writes via GTT before performing direct access via WC.
1735  *
1736  * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
1737  *     pagefault; swapin remains transparent.
1738  *
1739  * Restrictions:
1740  *
1741  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
1742  *    hangs on some architectures, corruption on others. An attempt to service
1743  *    a GTT page fault from a snoopable object will generate a SIGBUS.
1744  *
1745  *  * the object must be able to fit into RAM (physical memory, though no
1746  *    limited to the mappable aperture).
1747  *
1748  *
1749  * Caveats:
1750  *
1751  *  * a new GTT page fault will synchronize rendering from the GPU and flush
1752  *    all data to system memory. Subsequent access will not be synchronized.
1753  *
1754  *  * all mappings are revoked on runtime device suspend.
1755  *
1756  *  * there are only 8, 16 or 32 fence registers to share between all users
1757  *    (older machines require fence register for display and blitter access
1758  *    as well). Contention of the fence registers will cause the previous users
1759  *    to be unmapped and any new access will generate new page faults.
1760  *
1761  *  * running out of memory while servicing a fault may generate a SIGBUS,
1762  *    rather than the expected SIGSEGV.
1763  */
1764 int i915_gem_mmap_gtt_version(void)
1765 {
1766         return 3;
1767 }
1768
1769 static inline struct i915_ggtt_view
1770 compute_partial_view(const struct drm_i915_gem_object *obj,
1771                      pgoff_t page_offset,
1772                      unsigned int chunk)
1773 {
1774         struct i915_ggtt_view view;
1775
1776         if (i915_gem_object_is_tiled(obj))
1777                 chunk = roundup(chunk, tile_row_pages(obj));
1778
1779         view.type = I915_GGTT_VIEW_PARTIAL;
1780         view.partial.offset = rounddown(page_offset, chunk);
1781         view.partial.size =
1782                 min_t(unsigned int, chunk,
1783                       (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
1784
1785         /* If the partial covers the entire object, just create a normal VMA. */
1786         if (chunk >= obj->base.size >> PAGE_SHIFT)
1787                 view.type = I915_GGTT_VIEW_NORMAL;
1788
1789         return view;
1790 }
1791
1792 /**
1793  * i915_gem_fault - fault a page into the GTT
1794  * @vmf: fault info
1795  *
1796  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1797  * from userspace.  The fault handler takes care of binding the object to
1798  * the GTT (if needed), allocating and programming a fence register (again,
1799  * only if needed based on whether the old reg is still valid or the object
1800  * is tiled) and inserting a new PTE into the faulting process.
1801  *
1802  * Note that the faulting process may involve evicting existing objects
1803  * from the GTT and/or fence registers to make room.  So performance may
1804  * suffer if the GTT working set is large or there are few fence registers
1805  * left.
1806  *
1807  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
1808  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
1809  */
1810 vm_fault_t i915_gem_fault(struct vm_fault *vmf)
1811 {
1812 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
1813         struct vm_area_struct *area = vmf->vma;
1814         struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
1815         struct drm_device *dev = obj->base.dev;
1816         struct drm_i915_private *dev_priv = to_i915(dev);
1817         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1818         bool write = area->vm_flags & VM_WRITE;
1819         intel_wakeref_t wakeref;
1820         struct i915_vma *vma;
1821         pgoff_t page_offset;
1822         int srcu;
1823         int ret;
1824
1825         /* Sanity check that we allow writing into this object */
1826         if (i915_gem_object_is_readonly(obj) && write)
1827                 return VM_FAULT_SIGBUS;
1828
1829         /* We don't use vmf->pgoff since that has the fake offset */
1830         page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
1831
1832         trace_i915_gem_object_fault(obj, page_offset, true, write);
1833
1834         ret = i915_gem_object_pin_pages(obj);
1835         if (ret)
1836                 goto err;
1837
1838         wakeref = intel_runtime_pm_get(dev_priv);
1839
1840         srcu = i915_reset_trylock(dev_priv);
1841         if (srcu < 0) {
1842                 ret = srcu;
1843                 goto err_rpm;
1844         }
1845
1846         ret = i915_mutex_lock_interruptible(dev);
1847         if (ret)
1848                 goto err_reset;
1849
1850         /* Access to snoopable pages through the GTT is incoherent. */
1851         if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
1852                 ret = -EFAULT;
1853                 goto err_unlock;
1854         }
1855
1856         /* Now pin it into the GTT as needed */
1857         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1858                                        PIN_MAPPABLE |
1859                                        PIN_NONBLOCK |
1860                                        PIN_NONFAULT);
1861         if (IS_ERR(vma)) {
1862                 /* Use a partial view if it is bigger than available space */
1863                 struct i915_ggtt_view view =
1864                         compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
1865                 unsigned int flags;
1866
1867                 flags = PIN_MAPPABLE;
1868                 if (view.type == I915_GGTT_VIEW_NORMAL)
1869                         flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
1870
1871                 /*
1872                  * Userspace is now writing through an untracked VMA, abandon
1873                  * all hope that the hardware is able to track future writes.
1874                  */
1875                 obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1876
1877                 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1878                 if (IS_ERR(vma) && !view.type) {
1879                         flags = PIN_MAPPABLE;
1880                         view.type = I915_GGTT_VIEW_PARTIAL;
1881                         vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1882                 }
1883         }
1884         if (IS_ERR(vma)) {
1885                 ret = PTR_ERR(vma);
1886                 goto err_unlock;
1887         }
1888
1889         ret = i915_vma_pin_fence(vma);
1890         if (ret)
1891                 goto err_unpin;
1892
1893         /* Finally, remap it using the new GTT offset */
1894         ret = remap_io_mapping(area,
1895                                area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
1896                                (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
1897                                min_t(u64, vma->size, area->vm_end - area->vm_start),
1898                                &ggtt->iomap);
1899         if (ret)
1900                 goto err_fence;
1901
1902         /* Mark as being mmapped into userspace for later revocation */
1903         assert_rpm_wakelock_held(dev_priv);
1904         if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
1905                 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
1906         GEM_BUG_ON(!obj->userfault_count);
1907
1908         i915_vma_set_ggtt_write(vma);
1909
1910 err_fence:
1911         i915_vma_unpin_fence(vma);
1912 err_unpin:
1913         __i915_vma_unpin(vma);
1914 err_unlock:
1915         mutex_unlock(&dev->struct_mutex);
1916 err_reset:
1917         i915_reset_unlock(dev_priv, srcu);
1918 err_rpm:
1919         intel_runtime_pm_put(dev_priv, wakeref);
1920         i915_gem_object_unpin_pages(obj);
1921 err:
1922         switch (ret) {
1923         case -EIO:
1924                 /*
1925                  * We eat errors when the gpu is terminally wedged to avoid
1926                  * userspace unduly crashing (gl has no provisions for mmaps to
1927                  * fail). But any other -EIO isn't ours (e.g. swap in failure)
1928                  * and so needs to be reported.
1929                  */
1930                 if (!i915_terminally_wedged(dev_priv))
1931                         return VM_FAULT_SIGBUS;
1932                 /* else: fall through */
1933         case -EAGAIN:
1934                 /*
1935                  * EAGAIN means the gpu is hung and we'll wait for the error
1936                  * handler to reset everything when re-faulting in
1937                  * i915_mutex_lock_interruptible.
1938                  */
1939         case 0:
1940         case -ERESTARTSYS:
1941         case -EINTR:
1942         case -EBUSY:
1943                 /*
1944                  * EBUSY is ok: this just means that another thread
1945                  * already did the job.
1946                  */
1947                 return VM_FAULT_NOPAGE;
1948         case -ENOMEM:
1949                 return VM_FAULT_OOM;
1950         case -ENOSPC:
1951         case -EFAULT:
1952                 return VM_FAULT_SIGBUS;
1953         default:
1954                 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1955                 return VM_FAULT_SIGBUS;
1956         }
1957 }
1958
1959 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
1960 {
1961         struct i915_vma *vma;
1962
1963         GEM_BUG_ON(!obj->userfault_count);
1964
1965         obj->userfault_count = 0;
1966         list_del(&obj->userfault_link);
1967         drm_vma_node_unmap(&obj->base.vma_node,
1968                            obj->base.dev->anon_inode->i_mapping);
1969
1970         for_each_ggtt_vma(vma, obj)
1971                 i915_vma_unset_userfault(vma);
1972 }
1973
1974 /**
1975  * i915_gem_release_mmap - remove physical page mappings
1976  * @obj: obj in question
1977  *
1978  * Preserve the reservation of the mmapping with the DRM core code, but
1979  * relinquish ownership of the pages back to the system.
1980  *
1981  * It is vital that we remove the page mapping if we have mapped a tiled
1982  * object through the GTT and then lose the fence register due to
1983  * resource pressure. Similarly if the object has been moved out of the
1984  * aperture, than pages mapped into userspace must be revoked. Removing the
1985  * mapping will then trigger a page fault on the next user access, allowing
1986  * fixup by i915_gem_fault().
1987  */
1988 void
1989 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1990 {
1991         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1992         intel_wakeref_t wakeref;
1993
1994         /* Serialisation between user GTT access and our code depends upon
1995          * revoking the CPU's PTE whilst the mutex is held. The next user
1996          * pagefault then has to wait until we release the mutex.
1997          *
1998          * Note that RPM complicates somewhat by adding an additional
1999          * requirement that operations to the GGTT be made holding the RPM
2000          * wakeref.
2001          */
2002         lockdep_assert_held(&i915->drm.struct_mutex);
2003         wakeref = intel_runtime_pm_get(i915);
2004
2005         if (!obj->userfault_count)
2006                 goto out;
2007
2008         __i915_gem_object_release_mmap(obj);
2009
2010         /* Ensure that the CPU's PTE are revoked and there are not outstanding
2011          * memory transactions from userspace before we return. The TLB
2012          * flushing implied above by changing the PTE above *should* be
2013          * sufficient, an extra barrier here just provides us with a bit
2014          * of paranoid documentation about our requirement to serialise
2015          * memory writes before touching registers / GSM.
2016          */
2017         wmb();
2018
2019 out:
2020         intel_runtime_pm_put(i915, wakeref);
2021 }
2022
2023 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
2024 {
2025         struct drm_i915_gem_object *obj, *on;
2026         int i;
2027
2028         /*
2029          * Only called during RPM suspend. All users of the userfault_list
2030          * must be holding an RPM wakeref to ensure that this can not
2031          * run concurrently with themselves (and use the struct_mutex for
2032          * protection between themselves).
2033          */
2034
2035         list_for_each_entry_safe(obj, on,
2036                                  &dev_priv->mm.userfault_list, userfault_link)
2037                 __i915_gem_object_release_mmap(obj);
2038
2039         /* The fence will be lost when the device powers down. If any were
2040          * in use by hardware (i.e. they are pinned), we should not be powering
2041          * down! All other fences will be reacquired by the user upon waking.
2042          */
2043         for (i = 0; i < dev_priv->num_fence_regs; i++) {
2044                 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
2045
2046                 /* Ideally we want to assert that the fence register is not
2047                  * live at this point (i.e. that no piece of code will be
2048                  * trying to write through fence + GTT, as that both violates
2049                  * our tracking of activity and associated locking/barriers,
2050                  * but also is illegal given that the hw is powered down).
2051                  *
2052                  * Previously we used reg->pin_count as a "liveness" indicator.
2053                  * That is not sufficient, and we need a more fine-grained
2054                  * tool if we want to have a sanity check here.
2055                  */
2056
2057                 if (!reg->vma)
2058                         continue;
2059
2060                 GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
2061                 reg->dirty = true;
2062         }
2063 }
2064
2065 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2066 {
2067         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2068         int err;
2069
2070         err = drm_gem_create_mmap_offset(&obj->base);
2071         if (likely(!err))
2072                 return 0;
2073
2074         /* Attempt to reap some mmap space from dead objects */
2075         do {
2076                 err = i915_gem_wait_for_idle(dev_priv,
2077                                              I915_WAIT_INTERRUPTIBLE,
2078                                              MAX_SCHEDULE_TIMEOUT);
2079                 if (err)
2080                         break;
2081
2082                 i915_gem_drain_freed_objects(dev_priv);
2083                 err = drm_gem_create_mmap_offset(&obj->base);
2084                 if (!err)
2085                         break;
2086
2087         } while (flush_delayed_work(&dev_priv->gt.retire_work));
2088
2089         return err;
2090 }
2091
2092 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2093 {
2094         drm_gem_free_mmap_offset(&obj->base);
2095 }
2096
2097 int
2098 i915_gem_mmap_gtt(struct drm_file *file,
2099                   struct drm_device *dev,
2100                   u32 handle,
2101                   u64 *offset)
2102 {
2103         struct drm_i915_gem_object *obj;
2104         int ret;
2105
2106         obj = i915_gem_object_lookup(file, handle);
2107         if (!obj)
2108                 return -ENOENT;
2109
2110         ret = i915_gem_object_create_mmap_offset(obj);
2111         if (ret == 0)
2112                 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2113
2114         i915_gem_object_put(obj);
2115         return ret;
2116 }
2117
2118 /**
2119  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2120  * @dev: DRM device
2121  * @data: GTT mapping ioctl data
2122  * @file: GEM object info
2123  *
2124  * Simply returns the fake offset to userspace so it can mmap it.
2125  * The mmap call will end up in drm_gem_mmap(), which will set things
2126  * up so we can get faults in the handler above.
2127  *
2128  * The fault handler will take care of binding the object into the GTT
2129  * (since it may have been evicted to make room for something), allocating
2130  * a fence register, and mapping the appropriate aperture address into
2131  * userspace.
2132  */
2133 int
2134 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2135                         struct drm_file *file)
2136 {
2137         struct drm_i915_gem_mmap_gtt *args = data;
2138
2139         return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2140 }
2141
2142 /* Immediately discard the backing storage */
2143 static void
2144 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2145 {
2146         i915_gem_object_free_mmap_offset(obj);
2147
2148         if (obj->base.filp == NULL)
2149                 return;
2150
2151         /* Our goal here is to return as much of the memory as
2152          * is possible back to the system as we are called from OOM.
2153          * To do this we must instruct the shmfs to drop all of its
2154          * backing pages, *now*.
2155          */
2156         shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2157         obj->mm.madv = __I915_MADV_PURGED;
2158         obj->mm.pages = ERR_PTR(-EFAULT);
2159 }
2160
2161 /* Try to discard unwanted pages */
2162 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2163 {
2164         struct address_space *mapping;
2165
2166         lockdep_assert_held(&obj->mm.lock);
2167         GEM_BUG_ON(i915_gem_object_has_pages(obj));
2168
2169         switch (obj->mm.madv) {
2170         case I915_MADV_DONTNEED:
2171                 i915_gem_object_truncate(obj);
2172         case __I915_MADV_PURGED:
2173                 return;
2174         }
2175
2176         if (obj->base.filp == NULL)
2177                 return;
2178
2179         mapping = obj->base.filp->f_mapping,
2180         invalidate_mapping_pages(mapping, 0, (loff_t)-1);
2181 }
2182
2183 /*
2184  * Move pages to appropriate lru and release the pagevec, decrementing the
2185  * ref count of those pages.
2186  */
2187 static void check_release_pagevec(struct pagevec *pvec)
2188 {
2189         check_move_unevictable_pages(pvec);
2190         __pagevec_release(pvec);
2191         cond_resched();
2192 }
2193
2194 static void
2195 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
2196                               struct sg_table *pages)
2197 {
2198         struct sgt_iter sgt_iter;
2199         struct pagevec pvec;
2200         struct page *page;
2201
2202         __i915_gem_object_release_shmem(obj, pages, true);
2203
2204         i915_gem_gtt_finish_pages(obj, pages);
2205
2206         if (i915_gem_object_needs_bit17_swizzle(obj))
2207                 i915_gem_object_save_bit_17_swizzle(obj, pages);
2208
2209         mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
2210
2211         pagevec_init(&pvec);
2212         for_each_sgt_page(page, sgt_iter, pages) {
2213                 if (obj->mm.dirty)
2214                         set_page_dirty(page);
2215
2216                 if (obj->mm.madv == I915_MADV_WILLNEED)
2217                         mark_page_accessed(page);
2218
2219                 if (!pagevec_add(&pvec, page))
2220                         check_release_pagevec(&pvec);
2221         }
2222         if (pagevec_count(&pvec))
2223                 check_release_pagevec(&pvec);
2224         obj->mm.dirty = false;
2225
2226         sg_free_table(pages);
2227         kfree(pages);
2228 }
2229
2230 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
2231 {
2232         struct radix_tree_iter iter;
2233         void __rcu **slot;
2234
2235         rcu_read_lock();
2236         radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
2237                 radix_tree_delete(&obj->mm.get_page.radix, iter.index);
2238         rcu_read_unlock();
2239 }
2240
2241 static struct sg_table *
2242 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
2243 {
2244         struct drm_i915_private *i915 = to_i915(obj->base.dev);
2245         struct sg_table *pages;
2246
2247         pages = fetch_and_zero(&obj->mm.pages);
2248         if (IS_ERR_OR_NULL(pages))
2249                 return pages;
2250
2251         spin_lock(&i915->mm.obj_lock);
2252         list_del(&obj->mm.link);
2253         spin_unlock(&i915->mm.obj_lock);
2254
2255         if (obj->mm.mapping) {
2256                 void *ptr;
2257
2258                 ptr = page_mask_bits(obj->mm.mapping);
2259                 if (is_vmalloc_addr(ptr))
2260                         vunmap(ptr);
2261                 else
2262                         kunmap(kmap_to_page(ptr));
2263
2264                 obj->mm.mapping = NULL;
2265         }
2266
2267         __i915_gem_object_reset_page_iter(obj);
2268         obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
2269
2270         return pages;
2271 }
2272
2273 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
2274                                 enum i915_mm_subclass subclass)
2275 {
2276         struct sg_table *pages;
2277         int ret;
2278
2279         if (i915_gem_object_has_pinned_pages(obj))
2280                 return -EBUSY;
2281
2282         GEM_BUG_ON(obj->bind_count);
2283
2284         /* May be called by shrinker from within get_pages() (on another bo) */
2285         mutex_lock_nested(&obj->mm.lock, subclass);
2286         if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
2287                 ret = -EBUSY;
2288                 goto unlock;
2289         }
2290
2291         /*
2292          * ->put_pages might need to allocate memory for the bit17 swizzle
2293          * array, hence protect them from being reaped by removing them from gtt
2294          * lists early.
2295          */
2296         pages = __i915_gem_object_unset_pages(obj);
2297
2298         /*
2299          * XXX Temporary hijinx to avoid updating all backends to handle
2300          * NULL pages. In the future, when we have more asynchronous
2301          * get_pages backends we should be better able to handle the
2302          * cancellation of the async task in a more uniform manner.
2303          */
2304         if (!pages && !i915_gem_object_needs_async_cancel(obj))
2305                 pages = ERR_PTR(-EINVAL);
2306
2307         if (!IS_ERR(pages))
2308                 obj->ops->put_pages(obj, pages);
2309
2310         ret = 0;
2311 unlock:
2312         mutex_unlock(&obj->mm.lock);
2313
2314         return ret;
2315 }
2316
2317 bool i915_sg_trim(struct sg_table *orig_st)
2318 {
2319         struct sg_table new_st;
2320         struct scatterlist *sg, *new_sg;
2321         unsigned int i;
2322
2323         if (orig_st->nents == orig_st->orig_nents)
2324                 return false;
2325
2326         if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
2327                 return false;
2328
2329         new_sg = new_st.sgl;
2330         for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
2331                 sg_set_page(new_sg, sg_page(sg), sg->length, 0);
2332                 sg_dma_address(new_sg) = sg_dma_address(sg);
2333                 sg_dma_len(new_sg) = sg_dma_len(sg);
2334
2335                 new_sg = sg_next(new_sg);
2336         }
2337         GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
2338
2339         sg_free_table(orig_st);
2340
2341         *orig_st = new_st;
2342         return true;
2343 }
2344
2345 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2346 {
2347         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2348         const unsigned long page_count = obj->base.size / PAGE_SIZE;
2349         unsigned long i;
2350         struct address_space *mapping;
2351         struct sg_table *st;
2352         struct scatterlist *sg;
2353         struct sgt_iter sgt_iter;
2354         struct page *page;
2355         unsigned long last_pfn = 0;     /* suppress gcc warning */
2356         unsigned int max_segment = i915_sg_segment_size();
2357         unsigned int sg_page_sizes;
2358         struct pagevec pvec;
2359         gfp_t noreclaim;
2360         int ret;
2361
2362         /*
2363          * Assert that the object is not currently in any GPU domain. As it
2364          * wasn't in the GTT, there shouldn't be any way it could have been in
2365          * a GPU cache
2366          */
2367         GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2368         GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2369
2370         /*
2371          * If there's no chance of allocating enough pages for the whole
2372          * object, bail early.
2373          */
2374         if (page_count > totalram_pages())
2375                 return -ENOMEM;
2376
2377         st = kmalloc(sizeof(*st), GFP_KERNEL);
2378         if (st == NULL)
2379                 return -ENOMEM;
2380
2381 rebuild_st:
2382         if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2383                 kfree(st);
2384                 return -ENOMEM;
2385         }
2386
2387         /*
2388          * Get the list of pages out of our struct file.  They'll be pinned
2389          * at this point until we release them.
2390          *
2391          * Fail silently without starting the shrinker
2392          */
2393         mapping = obj->base.filp->f_mapping;
2394         mapping_set_unevictable(mapping);
2395         noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
2396         noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
2397
2398         sg = st->sgl;
2399         st->nents = 0;
2400         sg_page_sizes = 0;
2401         for (i = 0; i < page_count; i++) {
2402                 const unsigned int shrink[] = {
2403                         I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
2404                         0,
2405                 }, *s = shrink;
2406                 gfp_t gfp = noreclaim;
2407
2408                 do {
2409                         cond_resched();
2410                         page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2411                         if (!IS_ERR(page))
2412                                 break;
2413
2414                         if (!*s) {
2415                                 ret = PTR_ERR(page);
2416                                 goto err_sg;
2417                         }
2418
2419                         i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++);
2420
2421                         /*
2422                          * We've tried hard to allocate the memory by reaping
2423                          * our own buffer, now let the real VM do its job and
2424                          * go down in flames if truly OOM.
2425                          *
2426                          * However, since graphics tend to be disposable,
2427                          * defer the oom here by reporting the ENOMEM back
2428                          * to userspace.
2429                          */
2430                         if (!*s) {
2431                                 /* reclaim and warn, but no oom */
2432                                 gfp = mapping_gfp_mask(mapping);
2433
2434                                 /*
2435                                  * Our bo are always dirty and so we require
2436                                  * kswapd to reclaim our pages (direct reclaim
2437                                  * does not effectively begin pageout of our
2438                                  * buffers on its own). However, direct reclaim
2439                                  * only waits for kswapd when under allocation
2440                                  * congestion. So as a result __GFP_RECLAIM is
2441                                  * unreliable and fails to actually reclaim our
2442                                  * dirty pages -- unless you try over and over
2443                                  * again with !__GFP_NORETRY. However, we still
2444                                  * want to fail this allocation rather than
2445                                  * trigger the out-of-memory killer and for
2446                                  * this we want __GFP_RETRY_MAYFAIL.
2447                                  */
2448                                 gfp |= __GFP_RETRY_MAYFAIL;
2449                         }
2450                 } while (1);
2451
2452                 if (!i ||
2453                     sg->length >= max_segment ||
2454                     page_to_pfn(page) != last_pfn + 1) {
2455                         if (i) {
2456                                 sg_page_sizes |= sg->length;
2457                                 sg = sg_next(sg);
2458                         }
2459                         st->nents++;
2460                         sg_set_page(sg, page, PAGE_SIZE, 0);
2461                 } else {
2462                         sg->length += PAGE_SIZE;
2463                 }
2464                 last_pfn = page_to_pfn(page);
2465
2466                 /* Check that the i965g/gm workaround works. */
2467                 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2468         }
2469         if (sg) { /* loop terminated early; short sg table */
2470                 sg_page_sizes |= sg->length;
2471                 sg_mark_end(sg);
2472         }
2473
2474         /* Trim unused sg entries to avoid wasting memory. */
2475         i915_sg_trim(st);
2476
2477         ret = i915_gem_gtt_prepare_pages(obj, st);
2478         if (ret) {
2479                 /*
2480                  * DMA remapping failed? One possible cause is that
2481                  * it could not reserve enough large entries, asking
2482                  * for PAGE_SIZE chunks instead may be helpful.
2483                  */
2484                 if (max_segment > PAGE_SIZE) {
2485                         for_each_sgt_page(page, sgt_iter, st)
2486                                 put_page(page);
2487                         sg_free_table(st);
2488
2489                         max_segment = PAGE_SIZE;
2490                         goto rebuild_st;
2491                 } else {
2492                         dev_warn(&dev_priv->drm.pdev->dev,
2493                                  "Failed to DMA remap %lu pages\n",
2494                                  page_count);
2495                         goto err_pages;
2496                 }
2497         }
2498
2499         if (i915_gem_object_needs_bit17_swizzle(obj))
2500                 i915_gem_object_do_bit_17_swizzle(obj, st);
2501
2502         __i915_gem_object_set_pages(obj, st, sg_page_sizes);
2503
2504         return 0;
2505
2506 err_sg:
2507         sg_mark_end(sg);
2508 err_pages:
2509         mapping_clear_unevictable(mapping);
2510         pagevec_init(&pvec);
2511         for_each_sgt_page(page, sgt_iter, st) {
2512                 if (!pagevec_add(&pvec, page))
2513                         check_release_pagevec(&pvec);
2514         }
2515         if (pagevec_count(&pvec))
2516                 check_release_pagevec(&pvec);
2517         sg_free_table(st);
2518         kfree(st);
2519
2520         /*
2521          * shmemfs first checks if there is enough memory to allocate the page
2522          * and reports ENOSPC should there be insufficient, along with the usual
2523          * ENOMEM for a genuine allocation failure.
2524          *
2525          * We use ENOSPC in our driver to mean that we have run out of aperture
2526          * space and so want to translate the error from shmemfs back to our
2527          * usual understanding of ENOMEM.
2528          */
2529         if (ret == -ENOSPC)
2530                 ret = -ENOMEM;
2531
2532         return ret;
2533 }
2534
2535 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
2536                                  struct sg_table *pages,
2537                                  unsigned int sg_page_sizes)
2538 {
2539         struct drm_i915_private *i915 = to_i915(obj->base.dev);
2540         unsigned long supported = INTEL_INFO(i915)->page_sizes;
2541         int i;
2542
2543         lockdep_assert_held(&obj->mm.lock);
2544
2545         /* Make the pages coherent with the GPU (flushing any swapin). */
2546         if (obj->cache_dirty) {
2547                 obj->write_domain = 0;
2548                 if (i915_gem_object_has_struct_page(obj))
2549                         drm_clflush_sg(pages);
2550                 obj->cache_dirty = false;
2551         }
2552
2553         obj->mm.get_page.sg_pos = pages->sgl;
2554         obj->mm.get_page.sg_idx = 0;
2555
2556         obj->mm.pages = pages;
2557
2558         if (i915_gem_object_is_tiled(obj) &&
2559             i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
2560                 GEM_BUG_ON(obj->mm.quirked);
2561                 __i915_gem_object_pin_pages(obj);
2562                 obj->mm.quirked = true;
2563         }
2564
2565         GEM_BUG_ON(!sg_page_sizes);
2566         obj->mm.page_sizes.phys = sg_page_sizes;
2567
2568         /*
2569          * Calculate the supported page-sizes which fit into the given
2570          * sg_page_sizes. This will give us the page-sizes which we may be able
2571          * to use opportunistically when later inserting into the GTT. For
2572          * example if phys=2G, then in theory we should be able to use 1G, 2M,
2573          * 64K or 4K pages, although in practice this will depend on a number of
2574          * other factors.
2575          */
2576         obj->mm.page_sizes.sg = 0;
2577         for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
2578                 if (obj->mm.page_sizes.phys & ~0u << i)
2579                         obj->mm.page_sizes.sg |= BIT(i);
2580         }
2581         GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
2582
2583         spin_lock(&i915->mm.obj_lock);
2584         list_add(&obj->mm.link, &i915->mm.unbound_list);
2585         spin_unlock(&i915->mm.obj_lock);
2586 }
2587
2588 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2589 {
2590         int err;
2591
2592         if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
2593                 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2594                 return -EFAULT;
2595         }
2596
2597         err = obj->ops->get_pages(obj);
2598         GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
2599
2600         return err;
2601 }
2602
2603 /* Ensure that the associated pages are gathered from the backing storage
2604  * and pinned into our object. i915_gem_object_pin_pages() may be called
2605  * multiple times before they are released by a single call to
2606  * i915_gem_object_unpin_pages() - once the pages are no longer referenced
2607  * either as a result of memory pressure (reaping pages under the shrinker)
2608  * or as the object is itself released.
2609  */
2610 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2611 {
2612         int err;
2613
2614         err = mutex_lock_interruptible(&obj->mm.lock);
2615         if (err)
2616                 return err;
2617
2618         if (unlikely(!i915_gem_object_has_pages(obj))) {
2619                 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2620
2621                 err = ____i915_gem_object_get_pages(obj);
2622                 if (err)
2623                         goto unlock;
2624
2625                 smp_mb__before_atomic();
2626         }
2627         atomic_inc(&obj->mm.pages_pin_count);
2628
2629 unlock:
2630         mutex_unlock(&obj->mm.lock);
2631         return err;
2632 }
2633
2634 /* The 'mapping' part of i915_gem_object_pin_map() below */
2635 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2636                                  enum i915_map_type type)
2637 {
2638         unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2639         struct sg_table *sgt = obj->mm.pages;
2640         struct sgt_iter sgt_iter;
2641         struct page *page;
2642         struct page *stack_pages[32];
2643         struct page **pages = stack_pages;
2644         unsigned long i = 0;
2645         pgprot_t pgprot;
2646         void *addr;
2647
2648         /* A single page can always be kmapped */
2649         if (n_pages == 1 && type == I915_MAP_WB)
2650                 return kmap(sg_page(sgt->sgl));
2651
2652         if (n_pages > ARRAY_SIZE(stack_pages)) {
2653                 /* Too big for stack -- allocate temporary array instead */
2654                 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
2655                 if (!pages)
2656                         return NULL;
2657         }
2658
2659         for_each_sgt_page(page, sgt_iter, sgt)
2660                 pages[i++] = page;
2661
2662         /* Check that we have the expected number of pages */
2663         GEM_BUG_ON(i != n_pages);
2664
2665         switch (type) {
2666         default:
2667                 MISSING_CASE(type);
2668                 /* fallthrough to use PAGE_KERNEL anyway */
2669         case I915_MAP_WB:
2670                 pgprot = PAGE_KERNEL;
2671                 break;
2672         case I915_MAP_WC:
2673                 pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2674                 break;
2675         }
2676         addr = vmap(pages, n_pages, 0, pgprot);
2677
2678         if (pages != stack_pages)
2679                 kvfree(pages);
2680
2681         return addr;
2682 }
2683
2684 /* get, pin, and map the pages of the object into kernel space */
2685 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2686                               enum i915_map_type type)
2687 {
2688         enum i915_map_type has_type;
2689         bool pinned;
2690         void *ptr;
2691         int ret;
2692
2693         if (unlikely(!i915_gem_object_has_struct_page(obj)))
2694                 return ERR_PTR(-ENXIO);
2695
2696         ret = mutex_lock_interruptible(&obj->mm.lock);
2697         if (ret)
2698                 return ERR_PTR(ret);
2699
2700         pinned = !(type & I915_MAP_OVERRIDE);
2701         type &= ~I915_MAP_OVERRIDE;
2702
2703         if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
2704                 if (unlikely(!i915_gem_object_has_pages(obj))) {
2705                         GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2706
2707                         ret = ____i915_gem_object_get_pages(obj);
2708                         if (ret)
2709                                 goto err_unlock;
2710
2711                         smp_mb__before_atomic();
2712                 }
2713                 atomic_inc(&obj->mm.pages_pin_count);
2714                 pinned = false;
2715         }
2716         GEM_BUG_ON(!i915_gem_object_has_pages(obj));
2717
2718         ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2719         if (ptr && has_type != type) {
2720                 if (pinned) {
2721                         ret = -EBUSY;
2722                         goto err_unpin;
2723                 }
2724
2725                 if (is_vmalloc_addr(ptr))
2726                         vunmap(ptr);
2727                 else
2728                         kunmap(kmap_to_page(ptr));
2729
2730                 ptr = obj->mm.mapping = NULL;
2731         }
2732
2733         if (!ptr) {
2734                 ptr = i915_gem_object_map(obj, type);
2735                 if (!ptr) {
2736                         ret = -ENOMEM;
2737                         goto err_unpin;
2738                 }
2739
2740                 obj->mm.mapping = page_pack_bits(ptr, type);
2741         }
2742
2743 out_unlock:
2744         mutex_unlock(&obj->mm.lock);
2745         return ptr;
2746
2747 err_unpin:
2748         atomic_dec(&obj->mm.pages_pin_count);
2749 err_unlock:
2750         ptr = ERR_PTR(ret);
2751         goto out_unlock;
2752 }
2753
2754 void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
2755                                  unsigned long offset,
2756                                  unsigned long size)
2757 {
2758         enum i915_map_type has_type;
2759         void *ptr;
2760
2761         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
2762         GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
2763                                      offset, size, obj->base.size));
2764
2765         obj->mm.dirty = true;
2766
2767         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
2768                 return;
2769
2770         ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2771         if (has_type == I915_MAP_WC)
2772                 return;
2773
2774         drm_clflush_virt_range(ptr + offset, size);
2775         if (size == obj->base.size) {
2776                 obj->write_domain &= ~I915_GEM_DOMAIN_CPU;
2777                 obj->cache_dirty = false;
2778         }
2779 }
2780
2781 static int
2782 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
2783                            const struct drm_i915_gem_pwrite *arg)
2784 {
2785         struct address_space *mapping = obj->base.filp->f_mapping;
2786         char __user *user_data = u64_to_user_ptr(arg->data_ptr);
2787         u64 remain, offset;
2788         unsigned int pg;
2789
2790         /* Before we instantiate/pin the backing store for our use, we
2791          * can prepopulate the shmemfs filp efficiently using a write into
2792          * the pagecache. We avoid the penalty of instantiating all the
2793          * pages, important if the user is just writing to a few and never
2794          * uses the object on the GPU, and using a direct write into shmemfs
2795          * allows it to avoid the cost of retrieving a page (either swapin
2796          * or clearing-before-use) before it is overwritten.
2797          */
2798         if (i915_gem_object_has_pages(obj))
2799                 return -ENODEV;
2800
2801         if (obj->mm.madv != I915_MADV_WILLNEED)
2802                 return -EFAULT;
2803
2804         /* Before the pages are instantiated the object is treated as being
2805          * in the CPU domain. The pages will be clflushed as required before
2806          * use, and we can freely write into the pages directly. If userspace
2807          * races pwrite with any other operation; corruption will ensue -
2808          * that is userspace's prerogative!
2809          */
2810
2811         remain = arg->size;
2812         offset = arg->offset;
2813         pg = offset_in_page(offset);
2814
2815         do {
2816                 unsigned int len, unwritten;
2817                 struct page *page;
2818                 void *data, *vaddr;
2819                 int err;
2820
2821                 len = PAGE_SIZE - pg;
2822                 if (len > remain)
2823                         len = remain;
2824
2825                 err = pagecache_write_begin(obj->base.filp, mapping,
2826                                             offset, len, 0,
2827                                             &page, &data);
2828                 if (err < 0)
2829                         return err;
2830
2831                 vaddr = kmap(page);
2832                 unwritten = copy_from_user(vaddr + pg, user_data, len);
2833                 kunmap(page);
2834
2835                 err = pagecache_write_end(obj->base.filp, mapping,
2836                                           offset, len, len - unwritten,
2837                                           page, data);
2838                 if (err < 0)
2839                         return err;
2840
2841                 if (unwritten)
2842                         return -EFAULT;
2843
2844                 remain -= len;
2845                 user_data += len;
2846                 offset += len;
2847                 pg = 0;
2848         } while (remain);
2849
2850         return 0;
2851 }
2852
2853 static void
2854 i915_gem_retire_work_handler(struct work_struct *work)
2855 {
2856         struct drm_i915_private *dev_priv =
2857                 container_of(work, typeof(*dev_priv), gt.retire_work.work);
2858         struct drm_device *dev = &dev_priv->drm;
2859
2860         /* Come back later if the device is busy... */
2861         if (mutex_trylock(&dev->struct_mutex)) {
2862                 i915_retire_requests(dev_priv);
2863                 mutex_unlock(&dev->struct_mutex);
2864         }
2865
2866         /*
2867          * Keep the retire handler running until we are finally idle.
2868          * We do not need to do this test under locking as in the worst-case
2869          * we queue the retire worker once too often.
2870          */
2871         if (READ_ONCE(dev_priv->gt.awake))
2872                 queue_delayed_work(dev_priv->wq,
2873                                    &dev_priv->gt.retire_work,
2874                                    round_jiffies_up_relative(HZ));
2875 }
2876
2877 static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
2878                                           unsigned long mask)
2879 {
2880         bool result = true;
2881
2882         /*
2883          * Even if we fail to switch, give whatever is running a small chance
2884          * to save itself before we report the failure. Yes, this may be a
2885          * false positive due to e.g. ENOMEM, caveat emptor!
2886          */
2887         if (i915_gem_switch_to_kernel_context(i915, mask))
2888                 result = false;
2889
2890         if (i915_gem_wait_for_idle(i915,
2891                                    I915_WAIT_LOCKED |
2892                                    I915_WAIT_FOR_IDLE_BOOST,
2893                                    I915_GEM_IDLE_TIMEOUT))
2894                 result = false;
2895
2896         if (!result) {
2897                 if (i915_modparams.reset) { /* XXX hide warning from gem_eio */
2898                         dev_err(i915->drm.dev,
2899                                 "Failed to idle engines, declaring wedged!\n");
2900                         GEM_TRACE_DUMP();
2901                 }
2902
2903                 /* Forcibly cancel outstanding work and leave the gpu quiet. */
2904                 i915_gem_set_wedged(i915);
2905         }
2906
2907         i915_retire_requests(i915); /* ensure we flush after wedging */
2908         return result;
2909 }
2910
2911 static bool load_power_context(struct drm_i915_private *i915)
2912 {
2913         /* Force loading the kernel context on all engines */
2914         if (!switch_to_kernel_context_sync(i915, ALL_ENGINES))
2915                 return false;
2916
2917         /*
2918          * Immediately park the GPU so that we enable powersaving and
2919          * treat it as idle. The next time we issue a request, we will
2920          * unpark and start using the engine->pinned_default_state, otherwise
2921          * it is in limbo and an early reset may fail.
2922          */
2923         __i915_gem_park(i915);
2924
2925         return true;
2926 }
2927
2928 static void
2929 i915_gem_idle_work_handler(struct work_struct *work)
2930 {
2931         struct drm_i915_private *i915 =
2932                 container_of(work, typeof(*i915), gt.idle_work.work);
2933         bool rearm_hangcheck;
2934
2935         if (!READ_ONCE(i915->gt.awake))
2936                 return;
2937
2938         if (READ_ONCE(i915->gt.active_requests))
2939                 return;
2940
2941         rearm_hangcheck =
2942                 cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
2943
2944         if (!mutex_trylock(&i915->drm.struct_mutex)) {
2945                 /* Currently busy, come back later */
2946                 mod_delayed_work(i915->wq,
2947                                  &i915->gt.idle_work,
2948                                  msecs_to_jiffies(50));
2949                 goto out_rearm;
2950         }
2951
2952         /*
2953          * Flush out the last user context, leaving only the pinned
2954          * kernel context resident. Should anything unfortunate happen
2955          * while we are idle (such as the GPU being power cycled), no users
2956          * will be harmed.
2957          */
2958         if (!work_pending(&i915->gt.idle_work.work) &&
2959             !i915->gt.active_requests) {
2960                 ++i915->gt.active_requests; /* don't requeue idle */
2961
2962                 switch_to_kernel_context_sync(i915, i915->gt.active_engines);
2963
2964                 if (!--i915->gt.active_requests) {
2965                         __i915_gem_park(i915);
2966                         rearm_hangcheck = false;
2967                 }
2968         }
2969
2970         mutex_unlock(&i915->drm.struct_mutex);
2971
2972 out_rearm:
2973         if (rearm_hangcheck) {
2974                 GEM_BUG_ON(!i915->gt.awake);
2975                 i915_queue_hangcheck(i915);
2976         }
2977 }
2978
2979 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
2980 {
2981         struct drm_i915_private *i915 = to_i915(gem->dev);
2982         struct drm_i915_gem_object *obj = to_intel_bo(gem);
2983         struct drm_i915_file_private *fpriv = file->driver_priv;
2984         struct i915_lut_handle *lut, *ln;
2985
2986         mutex_lock(&i915->drm.struct_mutex);
2987
2988         list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
2989                 struct i915_gem_context *ctx = lut->ctx;
2990                 struct i915_vma *vma;
2991
2992                 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
2993                 if (ctx->file_priv != fpriv)
2994                         continue;
2995
2996                 vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
2997                 GEM_BUG_ON(vma->obj != obj);
2998
2999                 /* We allow the process to have multiple handles to the same
3000                  * vma, in the same fd namespace, by virtue of flink/open.
3001                  */
3002                 GEM_BUG_ON(!vma->open_count);
3003                 if (!--vma->open_count && !i915_vma_is_ggtt(vma))
3004                         i915_vma_close(vma);
3005
3006                 list_del(&lut->obj_link);
3007                 list_del(&lut->ctx_link);
3008
3009                 i915_lut_handle_free(lut);
3010                 __i915_gem_object_release_unless_active(obj);
3011         }
3012
3013         mutex_unlock(&i915->drm.struct_mutex);
3014 }
3015
3016 static unsigned long to_wait_timeout(s64 timeout_ns)
3017 {
3018         if (timeout_ns < 0)
3019                 return MAX_SCHEDULE_TIMEOUT;
3020
3021         if (timeout_ns == 0)
3022                 return 0;
3023
3024         return nsecs_to_jiffies_timeout(timeout_ns);
3025 }
3026
3027 /**
3028  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3029  * @dev: drm device pointer
3030  * @data: ioctl data blob
3031  * @file: drm file pointer
3032  *
3033  * Returns 0 if successful, else an error is returned with the remaining time in
3034  * the timeout parameter.
3035  *  -ETIME: object is still busy after timeout
3036  *  -ERESTARTSYS: signal interrupted the wait
3037  *  -ENONENT: object doesn't exist
3038  * Also possible, but rare:
3039  *  -EAGAIN: incomplete, restart syscall
3040  *  -ENOMEM: damn
3041  *  -ENODEV: Internal IRQ fail
3042  *  -E?: The add request failed
3043  *
3044  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3045  * non-zero timeout parameter the wait ioctl will wait for the given number of
3046  * nanoseconds on an object becoming unbusy. Since the wait itself does so
3047  * without holding struct_mutex the object may become re-busied before this
3048  * function completes. A similar but shorter * race condition exists in the busy
3049  * ioctl
3050  */
3051 int
3052 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3053 {
3054         struct drm_i915_gem_wait *args = data;
3055         struct drm_i915_gem_object *obj;
3056         ktime_t start;
3057         long ret;
3058
3059         if (args->flags != 0)
3060                 return -EINVAL;
3061
3062         obj = i915_gem_object_lookup(file, args->bo_handle);
3063         if (!obj)
3064                 return -ENOENT;
3065
3066         start = ktime_get();
3067
3068         ret = i915_gem_object_wait(obj,
3069                                    I915_WAIT_INTERRUPTIBLE |
3070                                    I915_WAIT_PRIORITY |
3071                                    I915_WAIT_ALL,
3072                                    to_wait_timeout(args->timeout_ns));
3073
3074         if (args->timeout_ns > 0) {
3075                 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
3076                 if (args->timeout_ns < 0)
3077                         args->timeout_ns = 0;
3078
3079                 /*
3080                  * Apparently ktime isn't accurate enough and occasionally has a
3081                  * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
3082                  * things up to make the test happy. We allow up to 1 jiffy.
3083                  *
3084                  * This is a regression from the timespec->ktime conversion.
3085                  */
3086                 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
3087                         args->timeout_ns = 0;
3088
3089                 /* Asked to wait beyond the jiffie/scheduler precision? */
3090                 if (ret == -ETIME && args->timeout_ns)
3091                         ret = -EAGAIN;
3092         }
3093
3094         i915_gem_object_put(obj);
3095         return ret;
3096 }
3097
3098 static int wait_for_engines(struct drm_i915_private *i915)
3099 {
3100         if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
3101                 dev_err(i915->drm.dev,
3102                         "Failed to idle engines, declaring wedged!\n");
3103                 GEM_TRACE_DUMP();
3104                 i915_gem_set_wedged(i915);
3105                 return -EIO;
3106         }
3107
3108         return 0;
3109 }
3110
3111 static long
3112 wait_for_timelines(struct drm_i915_private *i915,
3113                    unsigned int flags, long timeout)
3114 {
3115         struct i915_gt_timelines *gt = &i915->gt.timelines;
3116         struct i915_timeline *tl;
3117
3118         if (!READ_ONCE(i915->gt.active_requests))
3119                 return timeout;
3120
3121         mutex_lock(&gt->mutex);
3122         list_for_each_entry(tl, &gt->active_list, link) {
3123                 struct i915_request *rq;
3124
3125                 rq = i915_active_request_get_unlocked(&tl->last_request);
3126                 if (!rq)
3127                         continue;
3128
3129                 mutex_unlock(&gt->mutex);
3130
3131                 /*
3132                  * "Race-to-idle".
3133                  *
3134                  * Switching to the kernel context is often used a synchronous
3135                  * step prior to idling, e.g. in suspend for flushing all
3136                  * current operations to memory before sleeping. These we
3137                  * want to complete as quickly as possible to avoid prolonged
3138                  * stalls, so allow the gpu to boost to maximum clocks.
3139                  */
3140                 if (flags & I915_WAIT_FOR_IDLE_BOOST)
3141                         gen6_rps_boost(rq);
3142
3143                 timeout = i915_request_wait(rq, flags, timeout);
3144                 i915_request_put(rq);
3145                 if (timeout < 0)
3146                         return timeout;
3147
3148                 /* restart after reacquiring the lock */
3149                 mutex_lock(&gt->mutex);
3150                 tl = list_entry(&gt->active_list, typeof(*tl), link);
3151         }
3152         mutex_unlock(&gt->mutex);
3153
3154         return timeout;
3155 }
3156
3157 int i915_gem_wait_for_idle(struct drm_i915_private *i915,
3158                            unsigned int flags, long timeout)
3159 {
3160         GEM_TRACE("flags=%x (%s), timeout=%ld%s\n",
3161                   flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
3162                   timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "");
3163
3164         /* If the device is asleep, we have no requests outstanding */
3165         if (!READ_ONCE(i915->gt.awake))
3166                 return 0;
3167
3168         timeout = wait_for_timelines(i915, flags, timeout);
3169         if (timeout < 0)
3170                 return timeout;
3171
3172         if (flags & I915_WAIT_LOCKED) {
3173                 int err;
3174
3175                 lockdep_assert_held(&i915->drm.struct_mutex);
3176
3177                 err = wait_for_engines(i915);
3178                 if (err)
3179                         return err;
3180
3181                 i915_retire_requests(i915);
3182         }
3183
3184         return 0;
3185 }
3186
3187 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
3188 {
3189         /*
3190          * We manually flush the CPU domain so that we can override and
3191          * force the flush for the display, and perform it asyncrhonously.
3192          */
3193         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
3194         if (obj->cache_dirty)
3195                 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
3196         obj->write_domain = 0;
3197 }
3198
3199 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
3200 {
3201         if (!READ_ONCE(obj->pin_global))
3202                 return;
3203
3204         mutex_lock(&obj->base.dev->struct_mutex);
3205         __i915_gem_object_flush_for_display(obj);
3206         mutex_unlock(&obj->base.dev->struct_mutex);
3207 }
3208
3209 /**
3210  * Moves a single object to the WC read, and possibly write domain.
3211  * @obj: object to act on
3212  * @write: ask for write access or read only
3213  *
3214  * This function returns when the move is complete, including waiting on
3215  * flushes to occur.
3216  */
3217 int
3218 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
3219 {
3220         int ret;
3221
3222         lockdep_assert_held(&obj->base.dev->struct_mutex);
3223
3224         ret = i915_gem_object_wait(obj,
3225                                    I915_WAIT_INTERRUPTIBLE |
3226                                    I915_WAIT_LOCKED |
3227                                    (write ? I915_WAIT_ALL : 0),
3228                                    MAX_SCHEDULE_TIMEOUT);
3229         if (ret)
3230                 return ret;
3231
3232         if (obj->write_domain == I915_GEM_DOMAIN_WC)
3233                 return 0;
3234
3235         /* Flush and acquire obj->pages so that we are coherent through
3236          * direct access in memory with previous cached writes through
3237          * shmemfs and that our cache domain tracking remains valid.
3238          * For example, if the obj->filp was moved to swap without us
3239          * being notified and releasing the pages, we would mistakenly
3240          * continue to assume that the obj remained out of the CPU cached
3241          * domain.
3242          */
3243         ret = i915_gem_object_pin_pages(obj);
3244         if (ret)
3245                 return ret;
3246
3247         flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
3248
3249         /* Serialise direct access to this object with the barriers for
3250          * coherent writes from the GPU, by effectively invalidating the
3251          * WC domain upon first access.
3252          */
3253         if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
3254                 mb();
3255
3256         /* It should now be out of any other write domains, and we can update
3257          * the domain values for our changes.
3258          */
3259         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
3260         obj->read_domains |= I915_GEM_DOMAIN_WC;
3261         if (write) {
3262                 obj->read_domains = I915_GEM_DOMAIN_WC;
3263                 obj->write_domain = I915_GEM_DOMAIN_WC;
3264                 obj->mm.dirty = true;
3265         }
3266
3267         i915_gem_object_unpin_pages(obj);
3268         return 0;
3269 }
3270
3271 /**
3272  * Moves a single object to the GTT read, and possibly write domain.
3273  * @obj: object to act on
3274  * @write: ask for write access or read only
3275  *
3276  * This function returns when the move is complete, including waiting on
3277  * flushes to occur.
3278  */
3279 int
3280 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3281 {
3282         int ret;
3283
3284         lockdep_assert_held(&obj->base.dev->struct_mutex);
3285
3286         ret = i915_gem_object_wait(obj,
3287                                    I915_WAIT_INTERRUPTIBLE |
3288                                    I915_WAIT_LOCKED |
3289                                    (write ? I915_WAIT_ALL : 0),
3290                                    MAX_SCHEDULE_TIMEOUT);
3291         if (ret)
3292                 return ret;
3293
3294         if (obj->write_domain == I915_GEM_DOMAIN_GTT)
3295                 return 0;
3296
3297         /* Flush and acquire obj->pages so that we are coherent through
3298          * direct access in memory with previous cached writes through
3299          * shmemfs and that our cache domain tracking remains valid.
3300          * For example, if the obj->filp was moved to swap without us
3301          * being notified and releasing the pages, we would mistakenly
3302          * continue to assume that the obj remained out of the CPU cached
3303          * domain.
3304          */
3305         ret = i915_gem_object_pin_pages(obj);
3306         if (ret)
3307                 return ret;
3308
3309         flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
3310
3311         /* Serialise direct access to this object with the barriers for
3312          * coherent writes from the GPU, by effectively invalidating the
3313          * GTT domain upon first access.
3314          */
3315         if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
3316                 mb();
3317
3318         /* It should now be out of any other write domains, and we can update
3319          * the domain values for our changes.
3320          */
3321         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3322         obj->read_domains |= I915_GEM_DOMAIN_GTT;
3323         if (write) {
3324                 obj->read_domains = I915_GEM_DOMAIN_GTT;
3325                 obj->write_domain = I915_GEM_DOMAIN_GTT;
3326                 obj->mm.dirty = true;
3327         }
3328
3329         i915_gem_object_unpin_pages(obj);
3330         return 0;
3331 }
3332
3333 /**
3334  * Changes the cache-level of an object across all VMA.
3335  * @obj: object to act on
3336  * @cache_level: new cache level to set for the object
3337  *
3338  * After this function returns, the object will be in the new cache-level
3339  * across all GTT and the contents of the backing storage will be coherent,
3340  * with respect to the new cache-level. In order to keep the backing storage
3341  * coherent for all users, we only allow a single cache level to be set
3342  * globally on the object and prevent it from being changed whilst the
3343  * hardware is reading from the object. That is if the object is currently
3344  * on the scanout it will be set to uncached (or equivalent display
3345  * cache coherency) and all non-MOCS GPU access will also be uncached so
3346  * that all direct access to the scanout remains coherent.
3347  */
3348 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3349                                     enum i915_cache_level cache_level)
3350 {
3351         struct i915_vma *vma;
3352         int ret;
3353
3354         lockdep_assert_held(&obj->base.dev->struct_mutex);
3355
3356         if (obj->cache_level == cache_level)
3357                 return 0;
3358
3359         /* Inspect the list of currently bound VMA and unbind any that would
3360          * be invalid given the new cache-level. This is principally to
3361          * catch the issue of the CS prefetch crossing page boundaries and
3362          * reading an invalid PTE on older architectures.
3363          */
3364 restart:
3365         list_for_each_entry(vma, &obj->vma.list, obj_link) {
3366                 if (!drm_mm_node_allocated(&vma->node))
3367                         continue;
3368
3369                 if (i915_vma_is_pinned(vma)) {
3370                         DRM_DEBUG("can not change the cache level of pinned objects\n");
3371                         return -EBUSY;
3372                 }
3373
3374                 if (!i915_vma_is_closed(vma) &&
3375                     i915_gem_valid_gtt_space(vma, cache_level))
3376                         continue;
3377
3378                 ret = i915_vma_unbind(vma);
3379                 if (ret)
3380                         return ret;
3381
3382                 /* As unbinding may affect other elements in the
3383                  * obj->vma_list (due to side-effects from retiring
3384                  * an active vma), play safe and restart the iterator.
3385                  */
3386                 goto restart;
3387         }
3388
3389         /* We can reuse the existing drm_mm nodes but need to change the
3390          * cache-level on the PTE. We could simply unbind them all and
3391          * rebind with the correct cache-level on next use. However since
3392          * we already have a valid slot, dma mapping, pages etc, we may as
3393          * rewrite the PTE in the belief that doing so tramples upon less
3394          * state and so involves less work.
3395          */
3396         if (obj->bind_count) {
3397                 /* Before we change the PTE, the GPU must not be accessing it.
3398                  * If we wait upon the object, we know that all the bound
3399                  * VMA are no longer active.
3400                  */
3401                 ret = i915_gem_object_wait(obj,
3402                                            I915_WAIT_INTERRUPTIBLE |
3403                                            I915_WAIT_LOCKED |
3404                                            I915_WAIT_ALL,
3405                                            MAX_SCHEDULE_TIMEOUT);
3406                 if (ret)
3407                         return ret;
3408
3409                 if (!HAS_LLC(to_i915(obj->base.dev)) &&
3410                     cache_level != I915_CACHE_NONE) {
3411                         /* Access to snoopable pages through the GTT is
3412                          * incoherent and on some machines causes a hard
3413                          * lockup. Relinquish the CPU mmaping to force
3414                          * userspace to refault in the pages and we can
3415                          * then double check if the GTT mapping is still
3416                          * valid for that pointer access.
3417                          */
3418                         i915_gem_release_mmap(obj);
3419
3420                         /* As we no longer need a fence for GTT access,
3421                          * we can relinquish it now (and so prevent having
3422                          * to steal a fence from someone else on the next
3423                          * fence request). Note GPU activity would have
3424                          * dropped the fence as all snoopable access is
3425                          * supposed to be linear.
3426                          */
3427                         for_each_ggtt_vma(vma, obj) {
3428                                 ret = i915_vma_put_fence(vma);
3429                                 if (ret)
3430                                         return ret;
3431                         }
3432                 } else {
3433                         /* We either have incoherent backing store and
3434                          * so no GTT access or the architecture is fully
3435                          * coherent. In such cases, existing GTT mmaps
3436                          * ignore the cache bit in the PTE and we can
3437                          * rewrite it without confusing the GPU or having
3438                          * to force userspace to fault back in its mmaps.
3439                          */
3440                 }
3441
3442                 list_for_each_entry(vma, &obj->vma.list, obj_link) {
3443                         if (!drm_mm_node_allocated(&vma->node))
3444                                 continue;
3445
3446                         ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3447                         if (ret)
3448                                 return ret;
3449                 }
3450         }
3451
3452         list_for_each_entry(vma, &obj->vma.list, obj_link)
3453                 vma->node.color = cache_level;
3454         i915_gem_object_set_cache_coherency(obj, cache_level);
3455         obj->cache_dirty = true; /* Always invalidate stale cachelines */
3456
3457         return 0;
3458 }
3459
3460 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3461                                struct drm_file *file)
3462 {
3463         struct drm_i915_gem_caching *args = data;
3464         struct drm_i915_gem_object *obj;
3465         int err = 0;
3466
3467         rcu_read_lock();
3468         obj = i915_gem_object_lookup_rcu(file, args->handle);
3469         if (!obj) {
3470                 err = -ENOENT;
3471                 goto out;
3472         }
3473
3474         switch (obj->cache_level) {
3475         case I915_CACHE_LLC:
3476         case I915_CACHE_L3_LLC:
3477                 args->caching = I915_CACHING_CACHED;
3478                 break;
3479
3480         case I915_CACHE_WT:
3481                 args->caching = I915_CACHING_DISPLAY;
3482                 break;
3483
3484         default:
3485                 args->caching = I915_CACHING_NONE;
3486                 break;
3487         }
3488 out:
3489         rcu_read_unlock();
3490         return err;
3491 }
3492
3493 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3494                                struct drm_file *file)
3495 {
3496         struct drm_i915_private *i915 = to_i915(dev);
3497         struct drm_i915_gem_caching *args = data;
3498         struct drm_i915_gem_object *obj;
3499         enum i915_cache_level level;
3500         int ret = 0;
3501
3502         switch (args->caching) {
3503         case I915_CACHING_NONE:
3504                 level = I915_CACHE_NONE;
3505                 break;
3506         case I915_CACHING_CACHED:
3507                 /*
3508                  * Due to a HW issue on BXT A stepping, GPU stores via a
3509                  * snooped mapping may leave stale data in a corresponding CPU
3510                  * cacheline, whereas normally such cachelines would get
3511                  * invalidated.
3512                  */
3513                 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
3514                         return -ENODEV;
3515
3516                 level = I915_CACHE_LLC;
3517                 break;
3518         case I915_CACHING_DISPLAY:
3519                 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
3520                 break;
3521         default:
3522                 return -EINVAL;
3523         }
3524
3525         obj = i915_gem_object_lookup(file, args->handle);
3526         if (!obj)
3527                 return -ENOENT;
3528
3529         /*
3530          * The caching mode of proxy object is handled by its generator, and
3531          * not allowed to be changed by userspace.
3532          */
3533         if (i915_gem_object_is_proxy(obj)) {
3534                 ret = -ENXIO;
3535                 goto out;
3536         }
3537
3538         if (obj->cache_level == level)
3539                 goto out;
3540
3541         ret = i915_gem_object_wait(obj,
3542                                    I915_WAIT_INTERRUPTIBLE,
3543                                    MAX_SCHEDULE_TIMEOUT);
3544         if (ret)
3545                 goto out;
3546
3547         ret = i915_mutex_lock_interruptible(dev);
3548         if (ret)
3549                 goto out;
3550
3551         ret = i915_gem_object_set_cache_level(obj, level);
3552         mutex_unlock(&dev->struct_mutex);
3553
3554 out:
3555         i915_gem_object_put(obj);
3556         return ret;
3557 }
3558
3559 /*
3560  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
3561  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
3562  * (for pageflips). We only flush the caches while preparing the buffer for
3563  * display, the callers are responsible for frontbuffer flush.
3564  */
3565 struct i915_vma *
3566 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3567                                      u32 alignment,
3568                                      const struct i915_ggtt_view *view,
3569                                      unsigned int flags)
3570 {
3571         struct i915_vma *vma;
3572         int ret;
3573
3574         lockdep_assert_held(&obj->base.dev->struct_mutex);
3575
3576         /* Mark the global pin early so that we account for the
3577          * display coherency whilst setting up the cache domains.
3578          */
3579         obj->pin_global++;
3580
3581         /* The display engine is not coherent with the LLC cache on gen6.  As
3582          * a result, we make sure that the pinning that is about to occur is
3583          * done with uncached PTEs. This is lowest common denominator for all
3584          * chipsets.
3585          *
3586          * However for gen6+, we could do better by using the GFDT bit instead
3587          * of uncaching, which would allow us to flush all the LLC-cached data
3588          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3589          */
3590         ret = i915_gem_object_set_cache_level(obj,
3591                                               HAS_WT(to_i915(obj->base.dev)) ?
3592                                               I915_CACHE_WT : I915_CACHE_NONE);
3593         if (ret) {
3594                 vma = ERR_PTR(ret);
3595                 goto err_unpin_global;
3596         }
3597
3598         /* As the user may map the buffer once pinned in the display plane
3599          * (e.g. libkms for the bootup splash), we have to ensure that we
3600          * always use map_and_fenceable for all scanout buffers. However,
3601          * it may simply be too big to fit into mappable, in which case
3602          * put it anyway and hope that userspace can cope (but always first
3603          * try to preserve the existing ABI).
3604          */
3605         vma = ERR_PTR(-ENOSPC);
3606         if ((flags & PIN_MAPPABLE) == 0 &&
3607             (!view || view->type == I915_GGTT_VIEW_NORMAL))
3608                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
3609                                                flags |
3610                                                PIN_MAPPABLE |
3611                                                PIN_NONBLOCK);
3612         if (IS_ERR(vma))
3613                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
3614         if (IS_ERR(vma))
3615                 goto err_unpin_global;
3616
3617         vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
3618
3619         __i915_gem_object_flush_for_display(obj);
3620
3621         /* It should now be out of any other write domains, and we can update
3622          * the domain values for our changes.
3623          */
3624         obj->read_domains |= I915_GEM_DOMAIN_GTT;
3625
3626         return vma;
3627
3628 err_unpin_global:
3629         obj->pin_global--;
3630         return vma;
3631 }
3632
3633 void
3634 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
3635 {
3636         lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
3637
3638         if (WARN_ON(vma->obj->pin_global == 0))
3639                 return;
3640
3641         if (--vma->obj->pin_global == 0)
3642                 vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
3643
3644         /* Bump the LRU to try and avoid premature eviction whilst flipping  */
3645         i915_gem_object_bump_inactive_ggtt(vma->obj);
3646
3647         i915_vma_unpin(vma);
3648 }
3649
3650 /**
3651  * Moves a single object to the CPU read, and possibly write domain.
3652  * @obj: object to act on
3653  * @write: requesting write or read-only access
3654  *
3655  * This function returns when the move is complete, including waiting on
3656  * flushes to occur.
3657  */
3658 int
3659 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3660 {
3661         int ret;
3662
3663         lockdep_assert_held(&obj->base.dev->struct_mutex);
3664
3665         ret = i915_gem_object_wait(obj,
3666                                    I915_WAIT_INTERRUPTIBLE |
3667                                    I915_WAIT_LOCKED |
3668                                    (write ? I915_WAIT_ALL : 0),
3669                                    MAX_SCHEDULE_TIMEOUT);
3670         if (ret)
3671                 return ret;
3672
3673         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
3674
3675         /* Flush the CPU cache if it's still invalid. */
3676         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3677                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
3678                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
3679         }
3680
3681         /* It should now be out of any other write domains, and we can update
3682          * the domain values for our changes.
3683          */
3684         GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
3685
3686         /* If we're writing through the CPU, then the GPU read domains will
3687          * need to be invalidated at next use.
3688          */
3689         if (write)
3690                 __start_cpu_write(obj);
3691
3692         return 0;
3693 }
3694
3695 /* Throttle our rendering by waiting until the ring has completed our requests
3696  * emitted over 20 msec ago.
3697  *
3698  * Note that if we were to use the current jiffies each time around the loop,
3699  * we wouldn't escape the function with any frames outstanding if the time to
3700  * render a frame was over 20ms.
3701  *
3702  * This should get us reasonable parallelism between CPU and GPU but also
3703  * relatively low latency when blocking on a particular request to finish.
3704  */
3705 static int
3706 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3707 {
3708         struct drm_i915_private *dev_priv = to_i915(dev);
3709         struct drm_i915_file_private *file_priv = file->driver_priv;
3710         unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3711         struct i915_request *request, *target = NULL;
3712         long ret;
3713
3714         /* ABI: return -EIO if already wedged */
3715         ret = i915_terminally_wedged(dev_priv);
3716         if (ret)
3717                 return ret;
3718
3719         spin_lock(&file_priv->mm.lock);
3720         list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
3721                 if (time_after_eq(request->emitted_jiffies, recent_enough))
3722                         break;
3723
3724                 if (target) {
3725                         list_del(&target->client_link);
3726                         target->file_priv = NULL;
3727                 }
3728
3729                 target = request;
3730         }
3731         if (target)
3732                 i915_request_get(target);
3733         spin_unlock(&file_priv->mm.lock);
3734
3735         if (target == NULL)
3736                 return 0;
3737
3738         ret = i915_request_wait(target,
3739                                 I915_WAIT_INTERRUPTIBLE,
3740                                 MAX_SCHEDULE_TIMEOUT);
3741         i915_request_put(target);
3742
3743         return ret < 0 ? ret : 0;
3744 }
3745
3746 struct i915_vma *
3747 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3748                          const struct i915_ggtt_view *view,
3749                          u64 size,
3750                          u64 alignment,
3751                          u64 flags)
3752 {
3753         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3754         struct i915_address_space *vm = &dev_priv->ggtt.vm;
3755         struct i915_vma *vma;
3756         int ret;
3757
3758         lockdep_assert_held(&obj->base.dev->struct_mutex);
3759
3760         if (flags & PIN_MAPPABLE &&
3761             (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
3762                 /* If the required space is larger than the available
3763                  * aperture, we will not able to find a slot for the
3764                  * object and unbinding the object now will be in
3765                  * vain. Worse, doing so may cause us to ping-pong
3766                  * the object in and out of the Global GTT and
3767                  * waste a lot of cycles under the mutex.
3768                  */
3769                 if (obj->base.size > dev_priv->ggtt.mappable_end)
3770                         return ERR_PTR(-E2BIG);
3771
3772                 /* If NONBLOCK is set the caller is optimistically
3773                  * trying to cache the full object within the mappable
3774                  * aperture, and *must* have a fallback in place for
3775                  * situations where we cannot bind the object. We
3776                  * can be a little more lax here and use the fallback
3777                  * more often to avoid costly migrations of ourselves
3778                  * and other objects within the aperture.
3779                  *
3780                  * Half-the-aperture is used as a simple heuristic.
3781                  * More interesting would to do search for a free
3782                  * block prior to making the commitment to unbind.
3783                  * That caters for the self-harm case, and with a
3784                  * little more heuristics (e.g. NOFAULT, NOEVICT)
3785                  * we could try to minimise harm to others.
3786                  */
3787                 if (flags & PIN_NONBLOCK &&
3788                     obj->base.size > dev_priv->ggtt.mappable_end / 2)
3789                         return ERR_PTR(-ENOSPC);
3790         }
3791
3792         vma = i915_vma_instance(obj, vm, view);
3793         if (IS_ERR(vma))
3794                 return vma;
3795
3796         if (i915_vma_misplaced(vma, size, alignment, flags)) {
3797                 if (flags & PIN_NONBLOCK) {
3798                         if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
3799                                 return ERR_PTR(-ENOSPC);
3800
3801                         if (flags & PIN_MAPPABLE &&
3802                             vma->fence_size > dev_priv->ggtt.mappable_end / 2)
3803                                 return ERR_PTR(-ENOSPC);
3804                 }
3805
3806                 WARN(i915_vma_is_pinned(vma),
3807                      "bo is already pinned in ggtt with incorrect alignment:"
3808                      " offset=%08x, req.alignment=%llx,"
3809                      " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
3810                      i915_ggtt_offset(vma), alignment,
3811                      !!(flags & PIN_MAPPABLE),
3812                      i915_vma_is_map_and_fenceable(vma));
3813                 ret = i915_vma_unbind(vma);
3814                 if (ret)
3815                         return ERR_PTR(ret);
3816         }
3817
3818         ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
3819         if (ret)
3820                 return ERR_PTR(ret);
3821
3822         return vma;
3823 }
3824
3825 static __always_inline unsigned int __busy_read_flag(unsigned int id)
3826 {
3827         if (id == I915_ENGINE_CLASS_INVALID)
3828                 return 0xffff0000;
3829
3830         GEM_BUG_ON(id >= 16);
3831         return 0x10000 << id;
3832 }
3833
3834 static __always_inline unsigned int __busy_write_id(unsigned int id)
3835 {
3836         /*
3837          * The uABI guarantees an active writer is also amongst the read
3838          * engines. This would be true if we accessed the activity tracking
3839          * under the lock, but as we perform the lookup of the object and
3840          * its activity locklessly we can not guarantee that the last_write
3841          * being active implies that we have set the same engine flag from
3842          * last_read - hence we always set both read and write busy for
3843          * last_write.
3844          */
3845         if (id == I915_ENGINE_CLASS_INVALID)
3846                 return 0xffffffff;
3847
3848         return (id + 1) | __busy_read_flag(id);
3849 }
3850
3851 static __always_inline unsigned int
3852 __busy_set_if_active(const struct dma_fence *fence,
3853                      unsigned int (*flag)(unsigned int id))
3854 {
3855         const struct i915_request *rq;
3856
3857         /*
3858          * We have to check the current hw status of the fence as the uABI
3859          * guarantees forward progress. We could rely on the idle worker
3860          * to eventually flush us, but to minimise latency just ask the
3861          * hardware.
3862          *
3863          * Note we only report on the status of native fences.
3864          */
3865         if (!dma_fence_is_i915(fence))
3866                 return 0;
3867
3868         /* opencode to_request() in order to avoid const warnings */
3869         rq = container_of(fence, const struct i915_request, fence);
3870         if (i915_request_completed(rq))
3871                 return 0;
3872
3873         return flag(rq->engine->uabi_class);
3874 }
3875
3876 static __always_inline unsigned int
3877 busy_check_reader(const struct dma_fence *fence)
3878 {
3879         return __busy_set_if_active(fence, __busy_read_flag);
3880 }
3881
3882 static __always_inline unsigned int
3883 busy_check_writer(const struct dma_fence *fence)
3884 {
3885         if (!fence)
3886                 return 0;
3887
3888         return __busy_set_if_active(fence, __busy_write_id);
3889 }
3890
3891 int
3892 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3893                     struct drm_file *file)
3894 {
3895         struct drm_i915_gem_busy *args = data;
3896         struct drm_i915_gem_object *obj;
3897         struct reservation_object_list *list;
3898         unsigned int seq;
3899         int err;
3900
3901         err = -ENOENT;
3902         rcu_read_lock();
3903         obj = i915_gem_object_lookup_rcu(file, args->handle);
3904         if (!obj)
3905                 goto out;
3906
3907         /*
3908          * A discrepancy here is that we do not report the status of
3909          * non-i915 fences, i.e. even though we may report the object as idle,
3910          * a call to set-domain may still stall waiting for foreign rendering.
3911          * This also means that wait-ioctl may report an object as busy,
3912          * where busy-ioctl considers it idle.
3913          *
3914          * We trade the ability to warn of foreign fences to report on which
3915          * i915 engines are active for the object.
3916          *
3917          * Alternatively, we can trade that extra information on read/write
3918          * activity with
3919          *      args->busy =
3920          *              !reservation_object_test_signaled_rcu(obj->resv, true);
3921          * to report the overall busyness. This is what the wait-ioctl does.
3922          *
3923          */
3924 retry:
3925         seq = raw_read_seqcount(&obj->resv->seq);
3926
3927         /* Translate the exclusive fence to the READ *and* WRITE engine */
3928         args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
3929
3930         /* Translate shared fences to READ set of engines */
3931         list = rcu_dereference(obj->resv->fence);
3932         if (list) {
3933                 unsigned int shared_count = list->shared_count, i;
3934
3935                 for (i = 0; i < shared_count; ++i) {
3936                         struct dma_fence *fence =
3937                                 rcu_dereference(list->shared[i]);
3938
3939                         args->busy |= busy_check_reader(fence);
3940                 }
3941         }
3942
3943         if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
3944                 goto retry;
3945
3946         err = 0;
3947 out:
3948         rcu_read_unlock();
3949         return err;
3950 }
3951
3952 int
3953 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3954                         struct drm_file *file_priv)
3955 {
3956         return i915_gem_ring_throttle(dev, file_priv);
3957 }
3958
3959 int
3960 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3961                        struct drm_file *file_priv)
3962 {
3963         struct drm_i915_private *dev_priv = to_i915(dev);
3964         struct drm_i915_gem_madvise *args = data;
3965         struct drm_i915_gem_object *obj;
3966         int err;
3967
3968         switch (args->madv) {
3969         case I915_MADV_DONTNEED:
3970         case I915_MADV_WILLNEED:
3971             break;
3972         default:
3973             return -EINVAL;
3974         }
3975
3976         obj = i915_gem_object_lookup(file_priv, args->handle);
3977         if (!obj)
3978                 return -ENOENT;
3979
3980         err = mutex_lock_interruptible(&obj->mm.lock);
3981         if (err)
3982                 goto out;
3983
3984         if (i915_gem_object_has_pages(obj) &&
3985             i915_gem_object_is_tiled(obj) &&
3986             dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
3987                 if (obj->mm.madv == I915_MADV_WILLNEED) {
3988                         GEM_BUG_ON(!obj->mm.quirked);
3989                         __i915_gem_object_unpin_pages(obj);
3990                         obj->mm.quirked = false;
3991                 }
3992                 if (args->madv == I915_MADV_WILLNEED) {
3993                         GEM_BUG_ON(obj->mm.quirked);
3994                         __i915_gem_object_pin_pages(obj);
3995                         obj->mm.quirked = true;
3996                 }
3997         }
3998
3999         if (obj->mm.madv != __I915_MADV_PURGED)
4000                 obj->mm.madv = args->madv;
4001
4002         /* if the object is no longer attached, discard its backing storage */
4003         if (obj->mm.madv == I915_MADV_DONTNEED &&
4004             !i915_gem_object_has_pages(obj))
4005                 i915_gem_object_truncate(obj);
4006
4007         args->retained = obj->mm.madv != __I915_MADV_PURGED;
4008         mutex_unlock(&obj->mm.lock);
4009
4010 out:
4011         i915_gem_object_put(obj);
4012         return err;
4013 }
4014
4015 static void
4016 frontbuffer_retire(struct i915_active_request *active,
4017                    struct i915_request *request)
4018 {
4019         struct drm_i915_gem_object *obj =
4020                 container_of(active, typeof(*obj), frontbuffer_write);
4021
4022         intel_fb_obj_flush(obj, ORIGIN_CS);
4023 }
4024
4025 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4026                           const struct drm_i915_gem_object_ops *ops)
4027 {
4028         mutex_init(&obj->mm.lock);
4029
4030         spin_lock_init(&obj->vma.lock);
4031         INIT_LIST_HEAD(&obj->vma.list);
4032
4033         INIT_LIST_HEAD(&obj->lut_list);
4034         INIT_LIST_HEAD(&obj->batch_pool_link);
4035
4036         init_rcu_head(&obj->rcu);
4037
4038         obj->ops = ops;
4039
4040         reservation_object_init(&obj->__builtin_resv);
4041         obj->resv = &obj->__builtin_resv;
4042
4043         obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
4044         i915_active_request_init(&obj->frontbuffer_write,
4045                                  NULL, frontbuffer_retire);
4046
4047         obj->mm.madv = I915_MADV_WILLNEED;
4048         INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
4049         mutex_init(&obj->mm.get_page.lock);
4050
4051         i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
4052 }
4053
4054 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4055         .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
4056                  I915_GEM_OBJECT_IS_SHRINKABLE,
4057
4058         .get_pages = i915_gem_object_get_pages_gtt,
4059         .put_pages = i915_gem_object_put_pages_gtt,
4060
4061         .pwrite = i915_gem_object_pwrite_gtt,
4062 };
4063
4064 static int i915_gem_object_create_shmem(struct drm_device *dev,
4065                                         struct drm_gem_object *obj,
4066                                         size_t size)
4067 {
4068         struct drm_i915_private *i915 = to_i915(dev);
4069         unsigned long flags = VM_NORESERVE;
4070         struct file *filp;
4071
4072         drm_gem_private_object_init(dev, obj, size);
4073
4074         if (i915->mm.gemfs)
4075                 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
4076                                                  flags);
4077         else
4078                 filp = shmem_file_setup("i915", size, flags);
4079
4080         if (IS_ERR(filp))
4081                 return PTR_ERR(filp);
4082
4083         obj->filp = filp;
4084
4085         return 0;
4086 }
4087
4088 struct drm_i915_gem_object *
4089 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
4090 {
4091         struct drm_i915_gem_object *obj;
4092         struct address_space *mapping;
4093         unsigned int cache_level;
4094         gfp_t mask;
4095         int ret;
4096
4097         /* There is a prevalence of the assumption that we fit the object's
4098          * page count inside a 32bit _signed_ variable. Let's document this and
4099          * catch if we ever need to fix it. In the meantime, if you do spot
4100          * such a local variable, please consider fixing!
4101          */
4102         if (size >> PAGE_SHIFT > INT_MAX)
4103                 return ERR_PTR(-E2BIG);
4104
4105         if (overflows_type(size, obj->base.size))
4106                 return ERR_PTR(-E2BIG);
4107
4108         obj = i915_gem_object_alloc();
4109         if (obj == NULL)
4110                 return ERR_PTR(-ENOMEM);
4111
4112         ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size);
4113         if (ret)
4114                 goto fail;
4115
4116         mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4117         if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) {
4118                 /* 965gm cannot relocate objects above 4GiB. */
4119                 mask &= ~__GFP_HIGHMEM;
4120                 mask |= __GFP_DMA32;
4121         }
4122
4123         mapping = obj->base.filp->f_mapping;
4124         mapping_set_gfp_mask(mapping, mask);
4125         GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
4126
4127         i915_gem_object_init(obj, &i915_gem_object_ops);
4128
4129         obj->write_domain = I915_GEM_DOMAIN_CPU;
4130         obj->read_domains = I915_GEM_DOMAIN_CPU;
4131
4132         if (HAS_LLC(dev_priv))
4133                 /* On some devices, we can have the GPU use the LLC (the CPU
4134                  * cache) for about a 10% performance improvement
4135                  * compared to uncached.  Graphics requests other than
4136                  * display scanout are coherent with the CPU in
4137                  * accessing this cache.  This means in this mode we
4138                  * don't need to clflush on the CPU side, and on the
4139                  * GPU side we only need to flush internal caches to
4140                  * get data visible to the CPU.
4141                  *
4142                  * However, we maintain the display planes as UC, and so
4143                  * need to rebind when first used as such.
4144                  */
4145                 cache_level = I915_CACHE_LLC;
4146         else
4147                 cache_level = I915_CACHE_NONE;
4148
4149         i915_gem_object_set_cache_coherency(obj, cache_level);
4150
4151         trace_i915_gem_object_create(obj);
4152
4153         return obj;
4154
4155 fail:
4156         i915_gem_object_free(obj);
4157         return ERR_PTR(ret);
4158 }
4159
4160 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4161 {
4162         /* If we are the last user of the backing storage (be it shmemfs
4163          * pages or stolen etc), we know that the pages are going to be
4164          * immediately released. In this case, we can then skip copying
4165          * back the contents from the GPU.
4166          */
4167
4168         if (obj->mm.madv != I915_MADV_WILLNEED)
4169                 return false;
4170
4171         if (obj->base.filp == NULL)
4172                 return true;
4173
4174         /* At first glance, this looks racy, but then again so would be
4175          * userspace racing mmap against close. However, the first external
4176          * reference to the filp can only be obtained through the
4177          * i915_gem_mmap_ioctl() which safeguards us against the user
4178          * acquiring such a reference whilst we are in the middle of
4179          * freeing the object.
4180          */
4181         return atomic_long_read(&obj->base.filp->f_count) == 1;
4182 }
4183
4184 static void __i915_gem_free_objects(struct drm_i915_private *i915,
4185                                     struct llist_node *freed)
4186 {
4187         struct drm_i915_gem_object *obj, *on;
4188         intel_wakeref_t wakeref;
4189
4190         wakeref = intel_runtime_pm_get(i915);
4191         llist_for_each_entry_safe(obj, on, freed, freed) {
4192                 struct i915_vma *vma, *vn;
4193
4194                 trace_i915_gem_object_destroy(obj);
4195
4196                 mutex_lock(&i915->drm.struct_mutex);
4197
4198                 GEM_BUG_ON(i915_gem_object_is_active(obj));
4199                 list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
4200                         GEM_BUG_ON(i915_vma_is_active(vma));
4201                         vma->flags &= ~I915_VMA_PIN_MASK;
4202                         i915_vma_destroy(vma);
4203                 }
4204                 GEM_BUG_ON(!list_empty(&obj->vma.list));
4205                 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
4206
4207                 /* This serializes freeing with the shrinker. Since the free
4208                  * is delayed, first by RCU then by the workqueue, we want the
4209                  * shrinker to be able to free pages of unreferenced objects,
4210                  * or else we may oom whilst there are plenty of deferred
4211                  * freed objects.
4212                  */
4213                 if (i915_gem_object_has_pages(obj)) {
4214                         spin_lock(&i915->mm.obj_lock);
4215                         list_del_init(&obj->mm.link);
4216                         spin_unlock(&i915->mm.obj_lock);
4217                 }
4218
4219                 mutex_unlock(&i915->drm.struct_mutex);
4220
4221                 GEM_BUG_ON(obj->bind_count);
4222                 GEM_BUG_ON(obj->userfault_count);
4223                 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
4224                 GEM_BUG_ON(!list_empty(&obj->lut_list));
4225
4226                 if (obj->ops->release)
4227                         obj->ops->release(obj);
4228
4229                 if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
4230                         atomic_set(&obj->mm.pages_pin_count, 0);
4231                 __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
4232                 GEM_BUG_ON(i915_gem_object_has_pages(obj));
4233
4234                 if (obj->base.import_attach)
4235                         drm_prime_gem_destroy(&obj->base, NULL);
4236
4237                 reservation_object_fini(&obj->__builtin_resv);
4238                 drm_gem_object_release(&obj->base);
4239                 i915_gem_info_remove_obj(i915, obj->base.size);
4240
4241                 bitmap_free(obj->bit_17);
4242                 i915_gem_object_free(obj);
4243
4244                 GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
4245                 atomic_dec(&i915->mm.free_count);
4246
4247                 if (on)
4248                         cond_resched();
4249         }
4250         intel_runtime_pm_put(i915, wakeref);
4251 }
4252
4253 static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
4254 {
4255         struct llist_node *freed;
4256
4257         /* Free the oldest, most stale object to keep the free_list short */
4258         freed = NULL;
4259         if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
4260                 /* Only one consumer of llist_del_first() allowed */
4261                 spin_lock(&i915->mm.free_lock);
4262                 freed = llist_del_first(&i915->mm.free_list);
4263                 spin_unlock(&i915->mm.free_lock);
4264         }
4265         if (unlikely(freed)) {
4266                 freed->next = NULL;
4267                 __i915_gem_free_objects(i915, freed);
4268         }
4269 }
4270
4271 static void __i915_gem_free_work(struct work_struct *work)
4272 {
4273         struct drm_i915_private *i915 =
4274                 container_of(work, struct drm_i915_private, mm.free_work);
4275         struct llist_node *freed;
4276
4277         /*
4278          * All file-owned VMA should have been released by this point through
4279          * i915_gem_close_object(), or earlier by i915_gem_context_close().
4280          * However, the object may also be bound into the global GTT (e.g.
4281          * older GPUs without per-process support, or for direct access through
4282          * the GTT either for the user or for scanout). Those VMA still need to
4283          * unbound now.
4284          */
4285
4286         spin_lock(&i915->mm.free_lock);
4287         while ((freed = llist_del_all(&i915->mm.free_list))) {
4288                 spin_unlock(&i915->mm.free_lock);
4289
4290                 __i915_gem_free_objects(i915, freed);
4291                 if (need_resched())
4292                         return;
4293
4294                 spin_lock(&i915->mm.free_lock);
4295         }
4296         spin_unlock(&i915->mm.free_lock);
4297 }
4298
4299 static void __i915_gem_free_object_rcu(struct rcu_head *head)
4300 {
4301         struct drm_i915_gem_object *obj =
4302                 container_of(head, typeof(*obj), rcu);
4303         struct drm_i915_private *i915 = to_i915(obj->base.dev);
4304
4305         /*
4306          * We reuse obj->rcu for the freed list, so we had better not treat
4307          * it like a rcu_head from this point forwards. And we expect all
4308          * objects to be freed via this path.
4309          */
4310         destroy_rcu_head(&obj->rcu);
4311
4312         /*
4313          * Since we require blocking on struct_mutex to unbind the freed
4314          * object from the GPU before releasing resources back to the
4315          * system, we can not do that directly from the RCU callback (which may
4316          * be a softirq context), but must instead then defer that work onto a
4317          * kthread. We use the RCU callback rather than move the freed object
4318          * directly onto the work queue so that we can mix between using the
4319          * worker and performing frees directly from subsequent allocations for
4320          * crude but effective memory throttling.
4321          */
4322         if (llist_add(&obj->freed, &i915->mm.free_list))
4323                 queue_work(i915->wq, &i915->mm.free_work);
4324 }
4325
4326 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4327 {
4328         struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4329
4330         if (obj->mm.quirked)
4331                 __i915_gem_object_unpin_pages(obj);
4332
4333         if (discard_backing_storage(obj))
4334                 obj->mm.madv = I915_MADV_DONTNEED;
4335
4336         /*
4337          * Before we free the object, make sure any pure RCU-only
4338          * read-side critical sections are complete, e.g.
4339          * i915_gem_busy_ioctl(). For the corresponding synchronized
4340          * lookup see i915_gem_object_lookup_rcu().
4341          */
4342         atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
4343         call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
4344 }
4345
4346 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
4347 {
4348         lockdep_assert_held(&obj->base.dev->struct_mutex);
4349
4350         if (!i915_gem_object_has_active_reference(obj) &&
4351             i915_gem_object_is_active(obj))
4352                 i915_gem_object_set_active_reference(obj);
4353         else
4354                 i915_gem_object_put(obj);
4355 }
4356
4357 void i915_gem_sanitize(struct drm_i915_private *i915)
4358 {
4359         intel_wakeref_t wakeref;
4360
4361         GEM_TRACE("\n");
4362
4363         wakeref = intel_runtime_pm_get(i915);
4364         intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
4365
4366         /*
4367          * As we have just resumed the machine and woken the device up from
4368          * deep PCI sleep (presumably D3_cold), assume the HW has been reset
4369          * back to defaults, recovering from whatever wedged state we left it
4370          * in and so worth trying to use the device once more.
4371          */
4372         if (i915_terminally_wedged(i915))
4373                 i915_gem_unset_wedged(i915);
4374
4375         /*
4376          * If we inherit context state from the BIOS or earlier occupants
4377          * of the GPU, the GPU may be in an inconsistent state when we
4378          * try to take over. The only way to remove the earlier state
4379          * is by resetting. However, resetting on earlier gen is tricky as
4380          * it may impact the display and we are uncertain about the stability
4381          * of the reset, so this could be applied to even earlier gen.
4382          */
4383         intel_engines_sanitize(i915, false);
4384
4385         intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
4386         intel_runtime_pm_put(i915, wakeref);
4387
4388         mutex_lock(&i915->drm.struct_mutex);
4389         i915_gem_contexts_lost(i915);
4390         mutex_unlock(&i915->drm.struct_mutex);
4391 }
4392
4393 void i915_gem_suspend(struct drm_i915_private *i915)
4394 {
4395         intel_wakeref_t wakeref;
4396
4397         GEM_TRACE("\n");
4398
4399         wakeref = intel_runtime_pm_get(i915);
4400         intel_suspend_gt_powersave(i915);
4401
4402         flush_workqueue(i915->wq);
4403
4404         mutex_lock(&i915->drm.struct_mutex);
4405
4406         /*
4407          * We have to flush all the executing contexts to main memory so
4408          * that they can saved in the hibernation image. To ensure the last
4409          * context image is coherent, we have to switch away from it. That
4410          * leaves the i915->kernel_context still active when
4411          * we actually suspend, and its image in memory may not match the GPU
4412          * state. Fortunately, the kernel_context is disposable and we do
4413          * not rely on its state.
4414          */
4415         switch_to_kernel_context_sync(i915, i915->gt.active_engines);
4416
4417         mutex_unlock(&i915->drm.struct_mutex);
4418         i915_reset_flush(i915);
4419
4420         drain_delayed_work(&i915->gt.retire_work);
4421
4422         /*
4423          * As the idle_work is rearming if it detects a race, play safe and
4424          * repeat the flush until it is definitely idle.
4425          */
4426         drain_delayed_work(&i915->gt.idle_work);
4427
4428         /*
4429          * Assert that we successfully flushed all the work and
4430          * reset the GPU back to its idle, low power state.
4431          */
4432         GEM_BUG_ON(i915->gt.awake);
4433
4434         intel_uc_suspend(i915);
4435
4436         intel_runtime_pm_put(i915, wakeref);
4437 }
4438
4439 void i915_gem_suspend_late(struct drm_i915_private *i915)
4440 {
4441         struct drm_i915_gem_object *obj;
4442         struct list_head *phases[] = {
4443                 &i915->mm.unbound_list,
4444                 &i915->mm.bound_list,
4445                 NULL
4446         }, **phase;
4447
4448         /*
4449          * Neither the BIOS, ourselves or any other kernel
4450          * expects the system to be in execlists mode on startup,
4451          * so we need to reset the GPU back to legacy mode. And the only
4452          * known way to disable logical contexts is through a GPU reset.
4453          *
4454          * So in order to leave the system in a known default configuration,
4455          * always reset the GPU upon unload and suspend. Afterwards we then
4456          * clean up the GEM state tracking, flushing off the requests and
4457          * leaving the system in a known idle state.
4458          *
4459          * Note that is of the upmost importance that the GPU is idle and
4460          * all stray writes are flushed *before* we dismantle the backing
4461          * storage for the pinned objects.
4462          *
4463          * However, since we are uncertain that resetting the GPU on older
4464          * machines is a good idea, we don't - just in case it leaves the
4465          * machine in an unusable condition.
4466          */
4467
4468         mutex_lock(&i915->drm.struct_mutex);
4469         for (phase = phases; *phase; phase++) {
4470                 list_for_each_entry(obj, *phase, mm.link)
4471                         WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
4472         }
4473         mutex_unlock(&i915->drm.struct_mutex);
4474
4475         intel_uc_sanitize(i915);
4476         i915_gem_sanitize(i915);
4477 }
4478
4479 void i915_gem_resume(struct drm_i915_private *i915)
4480 {
4481         GEM_TRACE("\n");
4482
4483         WARN_ON(i915->gt.awake);
4484
4485         mutex_lock(&i915->drm.struct_mutex);
4486         intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
4487
4488         i915_gem_restore_gtt_mappings(i915);
4489         i915_gem_restore_fences(i915);
4490
4491         /*
4492          * As we didn't flush the kernel context before suspend, we cannot
4493          * guarantee that the context image is complete. So let's just reset
4494          * it and start again.
4495          */
4496         i915->gt.resume(i915);
4497
4498         if (i915_gem_init_hw(i915))
4499                 goto err_wedged;
4500
4501         intel_uc_resume(i915);
4502
4503         /* Always reload a context for powersaving. */
4504         if (!load_power_context(i915))
4505                 goto err_wedged;
4506
4507 out_unlock:
4508         intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
4509         mutex_unlock(&i915->drm.struct_mutex);
4510         return;
4511
4512 err_wedged:
4513         if (!i915_reset_failed(i915)) {
4514                 dev_err(i915->drm.dev,
4515                         "Failed to re-initialize GPU, declaring it wedged!\n");
4516                 i915_gem_set_wedged(i915);
4517         }
4518         goto out_unlock;
4519 }
4520
4521 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
4522 {
4523         if (INTEL_GEN(dev_priv) < 5 ||
4524             dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4525                 return;
4526
4527         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4528                                  DISP_TILE_SURFACE_SWIZZLING);
4529
4530         if (IS_GEN(dev_priv, 5))
4531                 return;
4532
4533         I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4534         if (IS_GEN(dev_priv, 6))
4535                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
4536         else if (IS_GEN(dev_priv, 7))
4537                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4538         else if (IS_GEN(dev_priv, 8))
4539                 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
4540         else
4541                 BUG();
4542 }
4543
4544 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
4545 {
4546         I915_WRITE(RING_CTL(base), 0);
4547         I915_WRITE(RING_HEAD(base), 0);
4548         I915_WRITE(RING_TAIL(base), 0);
4549         I915_WRITE(RING_START(base), 0);
4550 }
4551
4552 static void init_unused_rings(struct drm_i915_private *dev_priv)
4553 {
4554         if (IS_I830(dev_priv)) {
4555                 init_unused_ring(dev_priv, PRB1_BASE);
4556                 init_unused_ring(dev_priv, SRB0_BASE);
4557                 init_unused_ring(dev_priv, SRB1_BASE);
4558                 init_unused_ring(dev_priv, SRB2_BASE);
4559                 init_unused_ring(dev_priv, SRB3_BASE);
4560         } else if (IS_GEN(dev_priv, 2)) {
4561                 init_unused_ring(dev_priv, SRB0_BASE);
4562                 init_unused_ring(dev_priv, SRB1_BASE);
4563         } else if (IS_GEN(dev_priv, 3)) {
4564                 init_unused_ring(dev_priv, PRB1_BASE);
4565                 init_unused_ring(dev_priv, PRB2_BASE);
4566         }
4567 }
4568
4569 static int __i915_gem_restart_engines(void *data)
4570 {
4571         struct drm_i915_private *i915 = data;
4572         struct intel_engine_cs *engine;
4573         enum intel_engine_id id;
4574         int err;
4575
4576         for_each_engine(engine, i915, id) {
4577                 err = engine->init_hw(engine);
4578                 if (err) {
4579                         DRM_ERROR("Failed to restart %s (%d)\n",
4580                                   engine->name, err);
4581                         return err;
4582                 }
4583         }
4584
4585         intel_engines_set_scheduler_caps(i915);
4586
4587         return 0;
4588 }
4589
4590 int i915_gem_init_hw(struct drm_i915_private *dev_priv)
4591 {
4592         int ret;
4593
4594         dev_priv->gt.last_init_time = ktime_get();
4595
4596         /* Double layer security blanket, see i915_gem_init() */
4597         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
4598
4599         if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9)
4600                 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4601
4602         if (IS_HASWELL(dev_priv))
4603                 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
4604                            LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4605
4606         /* Apply the GT workarounds... */
4607         intel_gt_apply_workarounds(dev_priv);
4608         /* ...and determine whether they are sticking. */
4609         intel_gt_verify_workarounds(dev_priv, "init");
4610
4611         i915_gem_init_swizzling(dev_priv);
4612
4613         /*
4614          * At least 830 can leave some of the unused rings
4615          * "active" (ie. head != tail) after resume which
4616          * will prevent c3 entry. Makes sure all unused rings
4617          * are totally idle.
4618          */
4619         init_unused_rings(dev_priv);
4620
4621         BUG_ON(!dev_priv->kernel_context);
4622         ret = i915_terminally_wedged(dev_priv);
4623         if (ret)
4624                 goto out;
4625
4626         ret = i915_ppgtt_init_hw(dev_priv);
4627         if (ret) {
4628                 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
4629                 goto out;
4630         }
4631
4632         ret = intel_wopcm_init_hw(&dev_priv->wopcm);
4633         if (ret) {
4634                 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret);
4635                 goto out;
4636         }
4637
4638         /* We can't enable contexts until all firmware is loaded */
4639         ret = intel_uc_init_hw(dev_priv);
4640         if (ret) {
4641                 DRM_ERROR("Enabling uc failed (%d)\n", ret);
4642                 goto out;
4643         }
4644
4645         intel_mocs_init_l3cc_table(dev_priv);
4646
4647         /* Only when the HW is re-initialised, can we replay the requests */
4648         ret = __i915_gem_restart_engines(dev_priv);
4649         if (ret)
4650                 goto cleanup_uc;
4651
4652         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4653
4654         return 0;
4655
4656 cleanup_uc:
4657         intel_uc_fini_hw(dev_priv);
4658 out:
4659         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4660
4661         return ret;
4662 }
4663
4664 static int __intel_engines_record_defaults(struct drm_i915_private *i915)
4665 {
4666         struct i915_gem_context *ctx;
4667         struct intel_engine_cs *engine;
4668         enum intel_engine_id id;
4669         int err = 0;
4670
4671         /*
4672          * As we reset the gpu during very early sanitisation, the current
4673          * register state on the GPU should reflect its defaults values.
4674          * We load a context onto the hw (with restore-inhibit), then switch
4675          * over to a second context to save that default register state. We
4676          * can then prime every new context with that state so they all start
4677          * from the same default HW values.
4678          */
4679
4680         ctx = i915_gem_context_create_kernel(i915, 0);
4681         if (IS_ERR(ctx))
4682                 return PTR_ERR(ctx);
4683
4684         for_each_engine(engine, i915, id) {
4685                 struct i915_request *rq;
4686
4687                 rq = i915_request_alloc(engine, ctx);
4688                 if (IS_ERR(rq)) {
4689                         err = PTR_ERR(rq);
4690                         goto out_ctx;
4691                 }
4692
4693                 err = 0;
4694                 if (engine->init_context)
4695                         err = engine->init_context(rq);
4696
4697                 i915_request_add(rq);
4698                 if (err)
4699                         goto err_active;
4700         }
4701
4702         /* Flush the default context image to memory, and enable powersaving. */
4703         if (!load_power_context(i915)) {
4704                 err = -EIO;
4705                 goto err_active;
4706         }
4707
4708         for_each_engine(engine, i915, id) {
4709                 struct intel_context *ce;
4710                 struct i915_vma *state;
4711                 void *vaddr;
4712
4713                 ce = intel_context_lookup(ctx, engine);
4714                 if (!ce)
4715                         continue;
4716
4717                 state = ce->state;
4718                 if (!state)
4719                         continue;
4720
4721                 GEM_BUG_ON(intel_context_is_pinned(ce));
4722
4723                 /*
4724                  * As we will hold a reference to the logical state, it will
4725                  * not be torn down with the context, and importantly the
4726                  * object will hold onto its vma (making it possible for a
4727                  * stray GTT write to corrupt our defaults). Unmap the vma
4728                  * from the GTT to prevent such accidents and reclaim the
4729                  * space.
4730                  */
4731                 err = i915_vma_unbind(state);
4732                 if (err)
4733                         goto err_active;
4734
4735                 err = i915_gem_object_set_to_cpu_domain(state->obj, false);
4736                 if (err)
4737                         goto err_active;
4738
4739                 engine->default_state = i915_gem_object_get(state->obj);
4740                 i915_gem_object_set_cache_coherency(engine->default_state,
4741                                                     I915_CACHE_LLC);
4742
4743                 /* Check we can acquire the image of the context state */
4744                 vaddr = i915_gem_object_pin_map(engine->default_state,
4745                                                 I915_MAP_FORCE_WB);
4746                 if (IS_ERR(vaddr)) {
4747                         err = PTR_ERR(vaddr);
4748                         goto err_active;
4749                 }
4750
4751                 i915_gem_object_unpin_map(engine->default_state);
4752         }
4753
4754         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
4755                 unsigned int found = intel_engines_has_context_isolation(i915);
4756
4757                 /*
4758                  * Make sure that classes with multiple engine instances all
4759                  * share the same basic configuration.
4760                  */
4761                 for_each_engine(engine, i915, id) {
4762                         unsigned int bit = BIT(engine->uabi_class);
4763                         unsigned int expected = engine->default_state ? bit : 0;
4764
4765                         if ((found & bit) != expected) {
4766                                 DRM_ERROR("mismatching default context state for class %d on engine %s\n",
4767                                           engine->uabi_class, engine->name);
4768                         }
4769                 }
4770         }
4771
4772 out_ctx:
4773         i915_gem_context_set_closed(ctx);
4774         i915_gem_context_put(ctx);
4775         return err;
4776
4777 err_active:
4778         /*
4779          * If we have to abandon now, we expect the engines to be idle
4780          * and ready to be torn-down. The quickest way we can accomplish
4781          * this is by declaring ourselves wedged.
4782          */
4783         i915_gem_set_wedged(i915);
4784         goto out_ctx;
4785 }
4786
4787 static int
4788 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size)
4789 {
4790         struct drm_i915_gem_object *obj;
4791         struct i915_vma *vma;
4792         int ret;
4793
4794         obj = i915_gem_object_create_stolen(i915, size);
4795         if (!obj)
4796                 obj = i915_gem_object_create_internal(i915, size);
4797         if (IS_ERR(obj)) {
4798                 DRM_ERROR("Failed to allocate scratch page\n");
4799                 return PTR_ERR(obj);
4800         }
4801
4802         vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
4803         if (IS_ERR(vma)) {
4804                 ret = PTR_ERR(vma);
4805                 goto err_unref;
4806         }
4807
4808         ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
4809         if (ret)
4810                 goto err_unref;
4811
4812         i915->gt.scratch = vma;
4813         return 0;
4814
4815 err_unref:
4816         i915_gem_object_put(obj);
4817         return ret;
4818 }
4819
4820 static void i915_gem_fini_scratch(struct drm_i915_private *i915)
4821 {
4822         i915_vma_unpin_and_release(&i915->gt.scratch, 0);
4823 }
4824
4825 int i915_gem_init(struct drm_i915_private *dev_priv)
4826 {
4827         int ret;
4828
4829         /* We need to fallback to 4K pages if host doesn't support huge gtt. */
4830         if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv))
4831                 mkwrite_device_info(dev_priv)->page_sizes =
4832                         I915_GTT_PAGE_SIZE_4K;
4833
4834         dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
4835
4836         if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
4837                 dev_priv->gt.resume = intel_lr_context_resume;
4838                 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
4839         } else {
4840                 dev_priv->gt.resume = intel_legacy_submission_resume;
4841                 dev_priv->gt.cleanup_engine = intel_engine_cleanup;
4842         }
4843
4844         i915_timelines_init(dev_priv);
4845
4846         ret = i915_gem_init_userptr(dev_priv);
4847         if (ret)
4848                 return ret;
4849
4850         ret = intel_uc_init_misc(dev_priv);
4851         if (ret)
4852                 return ret;
4853
4854         ret = intel_wopcm_init(&dev_priv->wopcm);
4855         if (ret)
4856                 goto err_uc_misc;
4857
4858         /* This is just a security blanket to placate dragons.
4859          * On some systems, we very sporadically observe that the first TLBs
4860          * used by the CS may be stale, despite us poking the TLB reset. If
4861          * we hold the forcewake during initialisation these problems
4862          * just magically go away.
4863          */
4864         mutex_lock(&dev_priv->drm.struct_mutex);
4865         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
4866
4867         ret = i915_gem_init_ggtt(dev_priv);
4868         if (ret) {
4869                 GEM_BUG_ON(ret == -EIO);
4870                 goto err_unlock;
4871         }
4872
4873         ret = i915_gem_init_scratch(dev_priv,
4874                                     IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE);
4875         if (ret) {
4876                 GEM_BUG_ON(ret == -EIO);
4877                 goto err_ggtt;
4878         }
4879
4880         ret = i915_gem_contexts_init(dev_priv);
4881         if (ret) {
4882                 GEM_BUG_ON(ret == -EIO);
4883                 goto err_scratch;
4884         }
4885
4886         ret = intel_engines_init(dev_priv);
4887         if (ret) {
4888                 GEM_BUG_ON(ret == -EIO);
4889                 goto err_context;
4890         }
4891
4892         intel_init_gt_powersave(dev_priv);
4893
4894         ret = intel_uc_init(dev_priv);
4895         if (ret)
4896                 goto err_pm;
4897
4898         ret = i915_gem_init_hw(dev_priv);
4899         if (ret)
4900                 goto err_uc_init;
4901
4902         /*
4903          * Despite its name intel_init_clock_gating applies both display
4904          * clock gating workarounds; GT mmio workarounds and the occasional
4905          * GT power context workaround. Worse, sometimes it includes a context
4906          * register workaround which we need to apply before we record the
4907          * default HW state for all contexts.
4908          *
4909          * FIXME: break up the workarounds and apply them at the right time!
4910          */
4911         intel_init_clock_gating(dev_priv);
4912
4913         ret = __intel_engines_record_defaults(dev_priv);
4914         if (ret)
4915                 goto err_init_hw;
4916
4917         if (i915_inject_load_failure()) {
4918                 ret = -ENODEV;
4919                 goto err_init_hw;
4920         }
4921
4922         if (i915_inject_load_failure()) {
4923                 ret = -EIO;
4924                 goto err_init_hw;
4925         }
4926
4927         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4928         mutex_unlock(&dev_priv->drm.struct_mutex);
4929
4930         return 0;
4931
4932         /*
4933          * Unwinding is complicated by that we want to handle -EIO to mean
4934          * disable GPU submission but keep KMS alive. We want to mark the
4935          * HW as irrevisibly wedged, but keep enough state around that the
4936          * driver doesn't explode during runtime.
4937          */
4938 err_init_hw:
4939         mutex_unlock(&dev_priv->drm.struct_mutex);
4940
4941         i915_gem_suspend(dev_priv);
4942         i915_gem_suspend_late(dev_priv);
4943
4944         i915_gem_drain_workqueue(dev_priv);
4945
4946         mutex_lock(&dev_priv->drm.struct_mutex);
4947         intel_uc_fini_hw(dev_priv);
4948 err_uc_init:
4949         intel_uc_fini(dev_priv);
4950 err_pm:
4951         if (ret != -EIO) {
4952                 intel_cleanup_gt_powersave(dev_priv);
4953                 i915_gem_cleanup_engines(dev_priv);
4954         }
4955 err_context:
4956         if (ret != -EIO)
4957                 i915_gem_contexts_fini(dev_priv);
4958 err_scratch:
4959         i915_gem_fini_scratch(dev_priv);
4960 err_ggtt:
4961 err_unlock:
4962         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4963         mutex_unlock(&dev_priv->drm.struct_mutex);
4964
4965 err_uc_misc:
4966         intel_uc_fini_misc(dev_priv);
4967
4968         if (ret != -EIO) {
4969                 i915_gem_cleanup_userptr(dev_priv);
4970                 i915_timelines_fini(dev_priv);
4971         }
4972
4973         if (ret == -EIO) {
4974                 mutex_lock(&dev_priv->drm.struct_mutex);
4975
4976                 /*
4977                  * Allow engine initialisation to fail by marking the GPU as
4978                  * wedged. But we only want to do this where the GPU is angry,
4979                  * for all other failure, such as an allocation failure, bail.
4980                  */
4981                 if (!i915_reset_failed(dev_priv)) {
4982                         i915_load_error(dev_priv,
4983                                         "Failed to initialize GPU, declaring it wedged!\n");
4984                         i915_gem_set_wedged(dev_priv);
4985                 }
4986
4987                 /* Minimal basic recovery for KMS */
4988                 ret = i915_ggtt_enable_hw(dev_priv);
4989                 i915_gem_restore_gtt_mappings(dev_priv);
4990                 i915_gem_restore_fences(dev_priv);
4991                 intel_init_clock_gating(dev_priv);
4992
4993                 mutex_unlock(&dev_priv->drm.struct_mutex);
4994         }
4995
4996         i915_gem_drain_freed_objects(dev_priv);
4997         return ret;
4998 }
4999
5000 void i915_gem_fini(struct drm_i915_private *dev_priv)
5001 {
5002         i915_gem_suspend_late(dev_priv);
5003         intel_disable_gt_powersave(dev_priv);
5004
5005         /* Flush any outstanding unpin_work. */
5006         i915_gem_drain_workqueue(dev_priv);
5007
5008         mutex_lock(&dev_priv->drm.struct_mutex);
5009         intel_uc_fini_hw(dev_priv);
5010         intel_uc_fini(dev_priv);
5011         i915_gem_cleanup_engines(dev_priv);
5012         i915_gem_contexts_fini(dev_priv);
5013         i915_gem_fini_scratch(dev_priv);
5014         mutex_unlock(&dev_priv->drm.struct_mutex);
5015
5016         intel_wa_list_free(&dev_priv->gt_wa_list);
5017
5018         intel_cleanup_gt_powersave(dev_priv);
5019
5020         intel_uc_fini_misc(dev_priv);
5021         i915_gem_cleanup_userptr(dev_priv);
5022         i915_timelines_fini(dev_priv);
5023
5024         i915_gem_drain_freed_objects(dev_priv);
5025
5026         WARN_ON(!list_empty(&dev_priv->contexts.list));
5027 }
5028
5029 void i915_gem_init_mmio(struct drm_i915_private *i915)
5030 {
5031         i915_gem_sanitize(i915);
5032 }
5033
5034 void
5035 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
5036 {
5037         struct intel_engine_cs *engine;
5038         enum intel_engine_id id;
5039
5040         for_each_engine(engine, dev_priv, id)
5041                 dev_priv->gt.cleanup_engine(engine);
5042 }
5043
5044 void
5045 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
5046 {
5047         int i;
5048
5049         if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
5050             !IS_CHERRYVIEW(dev_priv))
5051                 dev_priv->num_fence_regs = 32;
5052         else if (INTEL_GEN(dev_priv) >= 4 ||
5053                  IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
5054                  IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
5055                 dev_priv->num_fence_regs = 16;
5056         else
5057                 dev_priv->num_fence_regs = 8;
5058
5059         if (intel_vgpu_active(dev_priv))
5060                 dev_priv->num_fence_regs =
5061                                 I915_READ(vgtif_reg(avail_rs.fence_num));
5062
5063         /* Initialize fence registers to zero */
5064         for (i = 0; i < dev_priv->num_fence_regs; i++) {
5065                 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
5066
5067                 fence->i915 = dev_priv;
5068                 fence->id = i;
5069                 list_add_tail(&fence->link, &dev_priv->mm.fence_list);
5070         }
5071         i915_gem_restore_fences(dev_priv);
5072
5073         i915_gem_detect_bit_6_swizzle(dev_priv);
5074 }
5075
5076 static void i915_gem_init__mm(struct drm_i915_private *i915)
5077 {
5078         spin_lock_init(&i915->mm.object_stat_lock);
5079         spin_lock_init(&i915->mm.obj_lock);
5080         spin_lock_init(&i915->mm.free_lock);
5081
5082         init_llist_head(&i915->mm.free_list);
5083
5084         INIT_LIST_HEAD(&i915->mm.unbound_list);
5085         INIT_LIST_HEAD(&i915->mm.bound_list);
5086         INIT_LIST_HEAD(&i915->mm.fence_list);
5087         INIT_LIST_HEAD(&i915->mm.userfault_list);
5088
5089         INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
5090 }
5091
5092 int i915_gem_init_early(struct drm_i915_private *dev_priv)
5093 {
5094         int err;
5095
5096         INIT_LIST_HEAD(&dev_priv->gt.active_rings);
5097         INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
5098
5099         i915_gem_init__mm(dev_priv);
5100
5101         INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
5102                           i915_gem_retire_work_handler);
5103         INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
5104                           i915_gem_idle_work_handler);
5105         init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
5106         init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
5107         mutex_init(&dev_priv->gpu_error.wedge_mutex);
5108         init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
5109
5110         atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
5111
5112         spin_lock_init(&dev_priv->fb_tracking.lock);
5113
5114         err = i915_gemfs_init(dev_priv);
5115         if (err)
5116                 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
5117
5118         return 0;
5119 }
5120
5121 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
5122 {
5123         i915_gem_drain_freed_objects(dev_priv);
5124         GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
5125         GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
5126         WARN_ON(dev_priv->mm.object_count);
5127
5128         cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
5129
5130         i915_gemfs_fini(dev_priv);
5131 }
5132
5133 int i915_gem_freeze(struct drm_i915_private *dev_priv)
5134 {
5135         /* Discard all purgeable objects, let userspace recover those as
5136          * required after resuming.
5137          */
5138         i915_gem_shrink_all(dev_priv);
5139
5140         return 0;
5141 }
5142
5143 int i915_gem_freeze_late(struct drm_i915_private *i915)
5144 {
5145         struct drm_i915_gem_object *obj;
5146         struct list_head *phases[] = {
5147                 &i915->mm.unbound_list,
5148                 &i915->mm.bound_list,
5149                 NULL
5150         }, **phase;
5151
5152         /*
5153          * Called just before we write the hibernation image.
5154          *
5155          * We need to update the domain tracking to reflect that the CPU
5156          * will be accessing all the pages to create and restore from the
5157          * hibernation, and so upon restoration those pages will be in the
5158          * CPU domain.
5159          *
5160          * To make sure the hibernation image contains the latest state,
5161          * we update that state just before writing out the image.
5162          *
5163          * To try and reduce the hibernation image, we manually shrink
5164          * the objects as well, see i915_gem_freeze()
5165          */
5166
5167         i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND);
5168         i915_gem_drain_freed_objects(i915);
5169
5170         mutex_lock(&i915->drm.struct_mutex);
5171         for (phase = phases; *phase; phase++) {
5172                 list_for_each_entry(obj, *phase, mm.link)
5173                         WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true));
5174         }
5175         mutex_unlock(&i915->drm.struct_mutex);
5176
5177         return 0;
5178 }
5179
5180 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5181 {
5182         struct drm_i915_file_private *file_priv = file->driver_priv;
5183         struct i915_request *request;
5184
5185         /* Clean up our request list when the client is going away, so that
5186          * later retire_requests won't dereference our soon-to-be-gone
5187          * file_priv.
5188          */
5189         spin_lock(&file_priv->mm.lock);
5190         list_for_each_entry(request, &file_priv->mm.request_list, client_link)
5191                 request->file_priv = NULL;
5192         spin_unlock(&file_priv->mm.lock);
5193 }
5194
5195 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
5196 {
5197         struct drm_i915_file_private *file_priv;
5198         int ret;
5199
5200         DRM_DEBUG("\n");
5201
5202         file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5203         if (!file_priv)
5204                 return -ENOMEM;
5205
5206         file->driver_priv = file_priv;
5207         file_priv->dev_priv = i915;
5208         file_priv->file = file;
5209
5210         spin_lock_init(&file_priv->mm.lock);
5211         INIT_LIST_HEAD(&file_priv->mm.request_list);
5212
5213         file_priv->bsd_engine = -1;
5214         file_priv->hang_timestamp = jiffies;
5215
5216         ret = i915_gem_context_open(i915, file);
5217         if (ret)
5218                 kfree(file_priv);
5219
5220         return ret;
5221 }
5222
5223 /**
5224  * i915_gem_track_fb - update frontbuffer tracking
5225  * @old: current GEM buffer for the frontbuffer slots
5226  * @new: new GEM buffer for the frontbuffer slots
5227  * @frontbuffer_bits: bitmask of frontbuffer slots
5228  *
5229  * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
5230  * from @old and setting them in @new. Both @old and @new can be NULL.
5231  */
5232 void i915_gem_track_fb(struct drm_i915_gem_object *old,
5233                        struct drm_i915_gem_object *new,
5234                        unsigned frontbuffer_bits)
5235 {
5236         /* Control of individual bits within the mask are guarded by
5237          * the owning plane->mutex, i.e. we can never see concurrent
5238          * manipulation of individual bits. But since the bitfield as a whole
5239          * is updated using RMW, we need to use atomics in order to update
5240          * the bits.
5241          */
5242         BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
5243                      BITS_PER_TYPE(atomic_t));
5244
5245         if (old) {
5246                 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
5247                 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
5248         }
5249
5250         if (new) {
5251                 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
5252                 atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
5253         }
5254 }
5255
5256 /* Allocate a new GEM object and fill it with the supplied data */
5257 struct drm_i915_gem_object *
5258 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
5259                                  const void *data, size_t size)
5260 {
5261         struct drm_i915_gem_object *obj;
5262         struct file *file;
5263         size_t offset;
5264         int err;
5265
5266         obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE));
5267         if (IS_ERR(obj))
5268                 return obj;
5269
5270         GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
5271
5272         file = obj->base.filp;
5273         offset = 0;
5274         do {
5275                 unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
5276                 struct page *page;
5277                 void *pgdata, *vaddr;
5278
5279                 err = pagecache_write_begin(file, file->f_mapping,
5280                                             offset, len, 0,
5281                                             &page, &pgdata);
5282                 if (err < 0)
5283                         goto fail;
5284
5285                 vaddr = kmap(page);
5286                 memcpy(vaddr, data, len);
5287                 kunmap(page);
5288
5289                 err = pagecache_write_end(file, file->f_mapping,
5290                                           offset, len, len,
5291                                           page, pgdata);
5292                 if (err < 0)
5293                         goto fail;
5294
5295                 size -= len;
5296                 data += len;
5297                 offset += len;
5298         } while (size);
5299
5300         return obj;
5301
5302 fail:
5303         i915_gem_object_put(obj);
5304         return ERR_PTR(err);
5305 }
5306
5307 struct scatterlist *
5308 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
5309                        unsigned int n,
5310                        unsigned int *offset)
5311 {
5312         struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
5313         struct scatterlist *sg;
5314         unsigned int idx, count;
5315
5316         might_sleep();
5317         GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
5318         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
5319
5320         /* As we iterate forward through the sg, we record each entry in a
5321          * radixtree for quick repeated (backwards) lookups. If we have seen
5322          * this index previously, we will have an entry for it.
5323          *
5324          * Initial lookup is O(N), but this is amortized to O(1) for
5325          * sequential page access (where each new request is consecutive
5326          * to the previous one). Repeated lookups are O(lg(obj->base.size)),
5327          * i.e. O(1) with a large constant!
5328          */
5329         if (n < READ_ONCE(iter->sg_idx))
5330                 goto lookup;
5331
5332         mutex_lock(&iter->lock);
5333
5334         /* We prefer to reuse the last sg so that repeated lookup of this
5335          * (or the subsequent) sg are fast - comparing against the last
5336          * sg is faster than going through the radixtree.
5337          */
5338
5339         sg = iter->sg_pos;
5340         idx = iter->sg_idx;
5341         count = __sg_page_count(sg);
5342
5343         while (idx + count <= n) {
5344                 void *entry;
5345                 unsigned long i;
5346                 int ret;
5347
5348                 /* If we cannot allocate and insert this entry, or the
5349                  * individual pages from this range, cancel updating the
5350                  * sg_idx so that on this lookup we are forced to linearly
5351                  * scan onwards, but on future lookups we will try the
5352                  * insertion again (in which case we need to be careful of
5353                  * the error return reporting that we have already inserted
5354                  * this index).
5355                  */
5356                 ret = radix_tree_insert(&iter->radix, idx, sg);
5357                 if (ret && ret != -EEXIST)
5358                         goto scan;
5359
5360                 entry = xa_mk_value(idx);
5361                 for (i = 1; i < count; i++) {
5362                         ret = radix_tree_insert(&iter->radix, idx + i, entry);
5363                         if (ret && ret != -EEXIST)
5364                                 goto scan;
5365                 }
5366
5367                 idx += count;
5368                 sg = ____sg_next(sg);
5369                 count = __sg_page_count(sg);
5370         }
5371
5372 scan:
5373         iter->sg_pos = sg;
5374         iter->sg_idx = idx;
5375
5376         mutex_unlock(&iter->lock);
5377
5378         if (unlikely(n < idx)) /* insertion completed by another thread */
5379                 goto lookup;
5380
5381         /* In case we failed to insert the entry into the radixtree, we need
5382          * to look beyond the current sg.
5383          */
5384         while (idx + count <= n) {
5385                 idx += count;
5386                 sg = ____sg_next(sg);
5387                 count = __sg_page_count(sg);
5388         }
5389
5390         *offset = n - idx;
5391         return sg;
5392
5393 lookup:
5394         rcu_read_lock();
5395
5396         sg = radix_tree_lookup(&iter->radix, n);
5397         GEM_BUG_ON(!sg);
5398
5399         /* If this index is in the middle of multi-page sg entry,
5400          * the radix tree will contain a value entry that points
5401          * to the start of that range. We will return the pointer to
5402          * the base page and the offset of this page within the
5403          * sg entry's range.
5404          */
5405         *offset = 0;
5406         if (unlikely(xa_is_value(sg))) {
5407                 unsigned long base = xa_to_value(sg);
5408
5409                 sg = radix_tree_lookup(&iter->radix, base);
5410                 GEM_BUG_ON(!sg);
5411
5412                 *offset = n - base;
5413         }
5414
5415         rcu_read_unlock();
5416
5417         return sg;
5418 }
5419
5420 struct page *
5421 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
5422 {
5423         struct scatterlist *sg;
5424         unsigned int offset;
5425
5426         GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
5427
5428         sg = i915_gem_object_get_sg(obj, n, &offset);
5429         return nth_page(sg_page(sg), offset);
5430 }
5431
5432 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
5433 struct page *
5434 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
5435                                unsigned int n)
5436 {
5437         struct page *page;
5438
5439         page = i915_gem_object_get_page(obj, n);
5440         if (!obj->mm.dirty)
5441                 set_page_dirty(page);
5442
5443         return page;
5444 }
5445
5446 dma_addr_t
5447 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
5448                                 unsigned long n)
5449 {
5450         struct scatterlist *sg;
5451         unsigned int offset;
5452
5453         sg = i915_gem_object_get_sg(obj, n, &offset);
5454         return sg_dma_address(sg) + (offset << PAGE_SHIFT);
5455 }
5456
5457 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
5458 {
5459         struct sg_table *pages;
5460         int err;
5461
5462         if (align > obj->base.size)
5463                 return -EINVAL;
5464
5465         if (obj->ops == &i915_gem_phys_ops)
5466                 return 0;
5467
5468         if (obj->ops != &i915_gem_object_ops)
5469                 return -EINVAL;
5470
5471         err = i915_gem_object_unbind(obj);
5472         if (err)
5473                 return err;
5474
5475         mutex_lock(&obj->mm.lock);
5476
5477         if (obj->mm.madv != I915_MADV_WILLNEED) {
5478                 err = -EFAULT;
5479                 goto err_unlock;
5480         }
5481
5482         if (obj->mm.quirked) {
5483                 err = -EFAULT;
5484                 goto err_unlock;
5485         }
5486
5487         if (obj->mm.mapping) {
5488                 err = -EBUSY;
5489                 goto err_unlock;
5490         }
5491
5492         pages = __i915_gem_object_unset_pages(obj);
5493
5494         obj->ops = &i915_gem_phys_ops;
5495
5496         err = ____i915_gem_object_get_pages(obj);
5497         if (err)
5498                 goto err_xfer;
5499
5500         /* Perma-pin (until release) the physical set of pages */
5501         __i915_gem_object_pin_pages(obj);
5502
5503         if (!IS_ERR_OR_NULL(pages))
5504                 i915_gem_object_ops.put_pages(obj, pages);
5505         mutex_unlock(&obj->mm.lock);
5506         return 0;
5507
5508 err_xfer:
5509         obj->ops = &i915_gem_object_ops;
5510         if (!IS_ERR_OR_NULL(pages)) {
5511                 unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
5512
5513                 __i915_gem_object_set_pages(obj, pages, sg_page_sizes);
5514         }
5515 err_unlock:
5516         mutex_unlock(&obj->mm.lock);
5517         return err;
5518 }
5519
5520 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5521 #include "selftests/scatterlist.c"
5522 #include "selftests/mock_gem_device.c"
5523 #include "selftests/huge_gem_object.c"
5524 #include "selftests/huge_pages.c"
5525 #include "selftests/i915_gem_object.c"
5526 #include "selftests/i915_gem_coherency.c"
5527 #include "selftests/i915_gem.c"
5528 #endif