drm/i915/gt: Hold rpm wakeref before taking ggtt->vm.mutex
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / intel_ggtt.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5
6 #include <linux/stop_machine.h>
7
8 #include <asm/set_memory.h>
9 #include <asm/smp.h>
10
11 #include "intel_gt.h"
12 #include "i915_drv.h"
13 #include "i915_scatterlist.h"
14 #include "i915_vgpu.h"
15
16 #include "intel_gtt.h"
17
18 static int
19 i915_get_ggtt_vma_pages(struct i915_vma *vma);
20
21 static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
22                                    unsigned long color,
23                                    u64 *start,
24                                    u64 *end)
25 {
26         if (i915_node_color_differs(node, color))
27                 *start += I915_GTT_PAGE_SIZE;
28
29         /*
30          * Also leave a space between the unallocated reserved node after the
31          * GTT and any objects within the GTT, i.e. we use the color adjustment
32          * to insert a guard page to prevent prefetches crossing over the
33          * GTT boundary.
34          */
35         node = list_next_entry(node, node_list);
36         if (node->color != color)
37                 *end -= I915_GTT_PAGE_SIZE;
38 }
39
40 static int ggtt_init_hw(struct i915_ggtt *ggtt)
41 {
42         struct drm_i915_private *i915 = ggtt->vm.i915;
43
44         i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
45
46         ggtt->vm.is_ggtt = true;
47
48         /* Only VLV supports read-only GGTT mappings */
49         ggtt->vm.has_read_only = IS_VALLEYVIEW(i915);
50
51         if (!HAS_LLC(i915) && !HAS_PPGTT(i915))
52                 ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust;
53
54         if (ggtt->mappable_end) {
55                 if (!io_mapping_init_wc(&ggtt->iomap,
56                                         ggtt->gmadr.start,
57                                         ggtt->mappable_end)) {
58                         ggtt->vm.cleanup(&ggtt->vm);
59                         return -EIO;
60                 }
61
62                 ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start,
63                                               ggtt->mappable_end);
64         }
65
66         i915_ggtt_init_fences(ggtt);
67
68         return 0;
69 }
70
71 /**
72  * i915_ggtt_init_hw - Initialize GGTT hardware
73  * @i915: i915 device
74  */
75 int i915_ggtt_init_hw(struct drm_i915_private *i915)
76 {
77         int ret;
78
79         stash_init(&i915->mm.wc_stash);
80
81         /*
82          * Note that we use page colouring to enforce a guard page at the
83          * end of the address space. This is required as the CS may prefetch
84          * beyond the end of the batch buffer, across the page boundary,
85          * and beyond the end of the GTT if we do not provide a guard.
86          */
87         ret = ggtt_init_hw(&i915->ggtt);
88         if (ret)
89                 return ret;
90
91         return 0;
92 }
93
94 /*
95  * Certain Gen5 chipsets require require idling the GPU before
96  * unmapping anything from the GTT when VT-d is enabled.
97  */
98 static bool needs_idle_maps(struct drm_i915_private *i915)
99 {
100         /*
101          * Query intel_iommu to see if we need the workaround. Presumably that
102          * was loaded first.
103          */
104         return IS_GEN(i915, 5) && IS_MOBILE(i915) && intel_vtd_active();
105 }
106
107 static void ggtt_suspend_mappings(struct i915_ggtt *ggtt)
108 {
109         struct drm_i915_private *i915 = ggtt->vm.i915;
110
111         /*
112          * Don't bother messing with faults pre GEN6 as we have little
113          * documentation supporting that it's a good idea.
114          */
115         if (INTEL_GEN(i915) < 6)
116                 return;
117
118         intel_gt_check_and_clear_faults(ggtt->vm.gt);
119
120         ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
121
122         ggtt->invalidate(ggtt);
123 }
124
125 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915)
126 {
127         ggtt_suspend_mappings(&i915->ggtt);
128 }
129
130 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
131 {
132         struct intel_uncore *uncore = ggtt->vm.gt->uncore;
133
134         spin_lock_irq(&uncore->lock);
135         intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
136         intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6);
137         spin_unlock_irq(&uncore->lock);
138 }
139
140 static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
141 {
142         struct intel_uncore *uncore = ggtt->vm.gt->uncore;
143
144         /*
145          * Note that as an uncached mmio write, this will flush the
146          * WCB of the writes into the GGTT before it triggers the invalidate.
147          */
148         intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
149 }
150
151 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
152 {
153         struct intel_uncore *uncore = ggtt->vm.gt->uncore;
154         struct drm_i915_private *i915 = ggtt->vm.i915;
155
156         gen8_ggtt_invalidate(ggtt);
157
158         if (INTEL_GEN(i915) >= 12)
159                 intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
160                                       GEN12_GUC_TLB_INV_CR_INVALIDATE);
161         else
162                 intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
163 }
164
165 static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
166 {
167         intel_gtt_chipset_flush();
168 }
169
170 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
171 {
172         writeq(pte, addr);
173 }
174
175 static void gen8_ggtt_insert_page(struct i915_address_space *vm,
176                                   dma_addr_t addr,
177                                   u64 offset,
178                                   enum i915_cache_level level,
179                                   u32 unused)
180 {
181         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
182         gen8_pte_t __iomem *pte =
183                 (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
184
185         gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
186
187         ggtt->invalidate(ggtt);
188 }
189
190 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
191                                      struct i915_vma *vma,
192                                      enum i915_cache_level level,
193                                      u32 flags)
194 {
195         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
196         struct sgt_iter sgt_iter;
197         gen8_pte_t __iomem *gtt_entries;
198         const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
199         dma_addr_t addr;
200
201         /*
202          * Note that we ignore PTE_READ_ONLY here. The caller must be careful
203          * not to allow the user to override access to a read only page.
204          */
205
206         gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
207         gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE;
208         for_each_sgt_daddr(addr, sgt_iter, vma->pages)
209                 gen8_set_pte(gtt_entries++, pte_encode | addr);
210
211         /*
212          * We want to flush the TLBs only after we're certain all the PTE
213          * updates have finished.
214          */
215         ggtt->invalidate(ggtt);
216 }
217
218 static void gen6_ggtt_insert_page(struct i915_address_space *vm,
219                                   dma_addr_t addr,
220                                   u64 offset,
221                                   enum i915_cache_level level,
222                                   u32 flags)
223 {
224         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
225         gen6_pte_t __iomem *pte =
226                 (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
227
228         iowrite32(vm->pte_encode(addr, level, flags), pte);
229
230         ggtt->invalidate(ggtt);
231 }
232
233 /*
234  * Binds an object into the global gtt with the specified cache level.
235  * The object will be accessible to the GPU via commands whose operands
236  * reference offsets within the global GTT as well as accessible by the GPU
237  * through the GMADR mapped BAR (i915->mm.gtt->gtt).
238  */
239 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
240                                      struct i915_vma *vma,
241                                      enum i915_cache_level level,
242                                      u32 flags)
243 {
244         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
245         gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
246         unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE;
247         struct sgt_iter iter;
248         dma_addr_t addr;
249
250         for_each_sgt_daddr(addr, iter, vma->pages)
251                 iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
252
253         /*
254          * We want to flush the TLBs only after we're certain all the PTE
255          * updates have finished.
256          */
257         ggtt->invalidate(ggtt);
258 }
259
260 static void nop_clear_range(struct i915_address_space *vm,
261                             u64 start, u64 length)
262 {
263 }
264
265 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
266                                   u64 start, u64 length)
267 {
268         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
269         unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
270         unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
271         const gen8_pte_t scratch_pte = vm->scratch[0].encode;
272         gen8_pte_t __iomem *gtt_base =
273                 (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
274         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
275         int i;
276
277         if (WARN(num_entries > max_entries,
278                  "First entry = %d; Num entries = %d (max=%d)\n",
279                  first_entry, num_entries, max_entries))
280                 num_entries = max_entries;
281
282         for (i = 0; i < num_entries; i++)
283                 gen8_set_pte(&gtt_base[i], scratch_pte);
284 }
285
286 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
287 {
288         /*
289          * Make sure the internal GAM fifo has been cleared of all GTT
290          * writes before exiting stop_machine(). This guarantees that
291          * any aperture accesses waiting to start in another process
292          * cannot back up behind the GTT writes causing a hang.
293          * The register can be any arbitrary GAM register.
294          */
295         intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6);
296 }
297
298 struct insert_page {
299         struct i915_address_space *vm;
300         dma_addr_t addr;
301         u64 offset;
302         enum i915_cache_level level;
303 };
304
305 static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
306 {
307         struct insert_page *arg = _arg;
308
309         gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
310         bxt_vtd_ggtt_wa(arg->vm);
311
312         return 0;
313 }
314
315 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
316                                           dma_addr_t addr,
317                                           u64 offset,
318                                           enum i915_cache_level level,
319                                           u32 unused)
320 {
321         struct insert_page arg = { vm, addr, offset, level };
322
323         stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
324 }
325
326 struct insert_entries {
327         struct i915_address_space *vm;
328         struct i915_vma *vma;
329         enum i915_cache_level level;
330         u32 flags;
331 };
332
333 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
334 {
335         struct insert_entries *arg = _arg;
336
337         gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
338         bxt_vtd_ggtt_wa(arg->vm);
339
340         return 0;
341 }
342
343 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
344                                              struct i915_vma *vma,
345                                              enum i915_cache_level level,
346                                              u32 flags)
347 {
348         struct insert_entries arg = { vm, vma, level, flags };
349
350         stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
351 }
352
353 struct clear_range {
354         struct i915_address_space *vm;
355         u64 start;
356         u64 length;
357 };
358
359 static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
360 {
361         struct clear_range *arg = _arg;
362
363         gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
364         bxt_vtd_ggtt_wa(arg->vm);
365
366         return 0;
367 }
368
369 static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
370                                           u64 start,
371                                           u64 length)
372 {
373         struct clear_range arg = { vm, start, length };
374
375         stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
376 }
377
378 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
379                                   u64 start, u64 length)
380 {
381         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
382         unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
383         unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
384         gen6_pte_t scratch_pte, __iomem *gtt_base =
385                 (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
386         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
387         int i;
388
389         if (WARN(num_entries > max_entries,
390                  "First entry = %d; Num entries = %d (max=%d)\n",
391                  first_entry, num_entries, max_entries))
392                 num_entries = max_entries;
393
394         scratch_pte = vm->scratch[0].encode;
395         for (i = 0; i < num_entries; i++)
396                 iowrite32(scratch_pte, &gtt_base[i]);
397 }
398
399 static void i915_ggtt_insert_page(struct i915_address_space *vm,
400                                   dma_addr_t addr,
401                                   u64 offset,
402                                   enum i915_cache_level cache_level,
403                                   u32 unused)
404 {
405         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
406                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
407
408         intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
409 }
410
411 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
412                                      struct i915_vma *vma,
413                                      enum i915_cache_level cache_level,
414                                      u32 unused)
415 {
416         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
417                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
418
419         intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
420                                     flags);
421 }
422
423 static void i915_ggtt_clear_range(struct i915_address_space *vm,
424                                   u64 start, u64 length)
425 {
426         intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
427 }
428
429 static int ggtt_bind_vma(struct i915_vma *vma,
430                          enum i915_cache_level cache_level,
431                          u32 flags)
432 {
433         struct drm_i915_gem_object *obj = vma->obj;
434         u32 pte_flags;
435
436         /* Applicable to VLV (gen8+ do not support RO in the GGTT) */
437         pte_flags = 0;
438         if (i915_gem_object_is_readonly(obj))
439                 pte_flags |= PTE_READ_ONLY;
440
441         vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
442
443         vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
444
445         /*
446          * Without aliasing PPGTT there's no difference between
447          * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
448          * upgrade to both bound if we bind either to avoid double-binding.
449          */
450         atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
451
452         return 0;
453 }
454
455 static void ggtt_unbind_vma(struct i915_vma *vma)
456 {
457         vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
458 }
459
460 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
461 {
462         u64 size;
463         int ret;
464
465         if (!USES_GUC(ggtt->vm.i915))
466                 return 0;
467
468         GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
469         size = ggtt->vm.total - GUC_GGTT_TOP;
470
471         ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size,
472                                    GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
473                                    PIN_NOEVICT);
474         if (ret)
475                 DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n");
476
477         return ret;
478 }
479
480 static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
481 {
482         if (drm_mm_node_allocated(&ggtt->uc_fw))
483                 drm_mm_remove_node(&ggtt->uc_fw);
484 }
485
486 static void cleanup_init_ggtt(struct i915_ggtt *ggtt)
487 {
488         ggtt_release_guc_top(ggtt);
489         if (drm_mm_node_allocated(&ggtt->error_capture))
490                 drm_mm_remove_node(&ggtt->error_capture);
491         mutex_destroy(&ggtt->error_mutex);
492 }
493
494 static int init_ggtt(struct i915_ggtt *ggtt)
495 {
496         /*
497          * Let GEM Manage all of the aperture.
498          *
499          * However, leave one page at the end still bound to the scratch page.
500          * There are a number of places where the hardware apparently prefetches
501          * past the end of the object, and we've seen multiple hangs with the
502          * GPU head pointer stuck in a batchbuffer bound at the last page of the
503          * aperture.  One page should be enough to keep any prefetching inside
504          * of the aperture.
505          */
506         unsigned long hole_start, hole_end;
507         struct drm_mm_node *entry;
508         int ret;
509
510         /*
511          * GuC requires all resources that we're sharing with it to be placed in
512          * non-WOPCM memory. If GuC is not present or not in use we still need a
513          * small bias as ring wraparound at offset 0 sometimes hangs. No idea
514          * why.
515          */
516         ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
517                                intel_wopcm_guc_size(&ggtt->vm.i915->wopcm));
518
519         ret = intel_vgt_balloon(ggtt);
520         if (ret)
521                 return ret;
522
523         mutex_init(&ggtt->error_mutex);
524         if (ggtt->mappable_end) {
525                 /* Reserve a mappable slot for our lockless error capture */
526                 ret = drm_mm_insert_node_in_range(&ggtt->vm.mm,
527                                                   &ggtt->error_capture,
528                                                   PAGE_SIZE, 0,
529                                                   I915_COLOR_UNEVICTABLE,
530                                                   0, ggtt->mappable_end,
531                                                   DRM_MM_INSERT_LOW);
532                 if (ret)
533                         return ret;
534         }
535
536         /*
537          * The upper portion of the GuC address space has a sizeable hole
538          * (several MB) that is inaccessible by GuC. Reserve this range within
539          * GGTT as it can comfortably hold GuC/HuC firmware images.
540          */
541         ret = ggtt_reserve_guc_top(ggtt);
542         if (ret)
543                 goto err;
544
545         /* Clear any non-preallocated blocks */
546         drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
547                 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
548                               hole_start, hole_end);
549                 ggtt->vm.clear_range(&ggtt->vm, hole_start,
550                                      hole_end - hole_start);
551         }
552
553         /* And finally clear the reserved guard page */
554         ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
555
556         return 0;
557
558 err:
559         cleanup_init_ggtt(ggtt);
560         return ret;
561 }
562
563 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
564                                  enum i915_cache_level cache_level,
565                                  u32 flags)
566 {
567         u32 pte_flags;
568         int ret;
569
570         /* Currently applicable only to VLV */
571         pte_flags = 0;
572         if (i915_gem_object_is_readonly(vma->obj))
573                 pte_flags |= PTE_READ_ONLY;
574
575         if (flags & I915_VMA_LOCAL_BIND) {
576                 struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;
577
578                 if (flags & I915_VMA_ALLOC) {
579                         ret = alias->vm.allocate_va_range(&alias->vm,
580                                                           vma->node.start,
581                                                           vma->size);
582                         if (ret)
583                                 return ret;
584
585                         set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
586                 }
587
588                 GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT,
589                                      __i915_vma_flags(vma)));
590                 alias->vm.insert_entries(&alias->vm, vma,
591                                          cache_level, pte_flags);
592         }
593
594         if (flags & I915_VMA_GLOBAL_BIND)
595                 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
596
597         return 0;
598 }
599
600 static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
601 {
602         if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
603                 struct i915_address_space *vm = vma->vm;
604
605                 vm->clear_range(vm, vma->node.start, vma->size);
606         }
607
608         if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
609                 struct i915_address_space *vm =
610                         &i915_vm_to_ggtt(vma->vm)->alias->vm;
611
612                 vm->clear_range(vm, vma->node.start, vma->size);
613         }
614 }
615
616 static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
617 {
618         struct i915_ppgtt *ppgtt;
619         int err;
620
621         ppgtt = i915_ppgtt_create(ggtt->vm.gt);
622         if (IS_ERR(ppgtt))
623                 return PTR_ERR(ppgtt);
624
625         if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
626                 err = -ENODEV;
627                 goto err_ppgtt;
628         }
629
630         /*
631          * Note we only pre-allocate as far as the end of the global
632          * GTT. On 48b / 4-level page-tables, the difference is very,
633          * very significant! We have to preallocate as GVT/vgpu does
634          * not like the page directory disappearing.
635          */
636         err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
637         if (err)
638                 goto err_ppgtt;
639
640         ggtt->alias = ppgtt;
641         ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
642
643         GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
644         ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
645
646         GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
647         ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
648
649         return 0;
650
651 err_ppgtt:
652         i915_vm_put(&ppgtt->vm);
653         return err;
654 }
655
656 static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
657 {
658         struct i915_ppgtt *ppgtt;
659
660         ppgtt = fetch_and_zero(&ggtt->alias);
661         if (!ppgtt)
662                 return;
663
664         i915_vm_put(&ppgtt->vm);
665
666         ggtt->vm.vma_ops.bind_vma   = ggtt_bind_vma;
667         ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
668 }
669
670 int i915_init_ggtt(struct drm_i915_private *i915)
671 {
672         int ret;
673
674         ret = init_ggtt(&i915->ggtt);
675         if (ret)
676                 return ret;
677
678         if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
679                 ret = init_aliasing_ppgtt(&i915->ggtt);
680                 if (ret)
681                         cleanup_init_ggtt(&i915->ggtt);
682         }
683
684         return 0;
685 }
686
687 static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
688 {
689         struct i915_vma *vma, *vn;
690
691         atomic_set(&ggtt->vm.open, 0);
692
693         rcu_barrier(); /* flush the RCU'ed__i915_vm_release */
694         flush_workqueue(ggtt->vm.i915->wq);
695
696         mutex_lock(&ggtt->vm.mutex);
697
698         list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
699                 WARN_ON(__i915_vma_unbind(vma));
700
701         if (drm_mm_node_allocated(&ggtt->error_capture))
702                 drm_mm_remove_node(&ggtt->error_capture);
703         mutex_destroy(&ggtt->error_mutex);
704
705         ggtt_release_guc_top(ggtt);
706         intel_vgt_deballoon(ggtt);
707
708         ggtt->vm.cleanup(&ggtt->vm);
709
710         mutex_unlock(&ggtt->vm.mutex);
711         i915_address_space_fini(&ggtt->vm);
712
713         arch_phys_wc_del(ggtt->mtrr);
714
715         if (ggtt->iomap.size)
716                 io_mapping_fini(&ggtt->iomap);
717 }
718
719 /**
720  * i915_ggtt_driver_release - Clean up GGTT hardware initialization
721  * @i915: i915 device
722  */
723 void i915_ggtt_driver_release(struct drm_i915_private *i915)
724 {
725         struct pagevec *pvec;
726
727         fini_aliasing_ppgtt(&i915->ggtt);
728
729         ggtt_cleanup_hw(&i915->ggtt);
730
731         pvec = &i915->mm.wc_stash.pvec;
732         if (pvec->nr) {
733                 set_pages_array_wb(pvec->pages, pvec->nr);
734                 __pagevec_release(pvec);
735         }
736 }
737
738 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
739 {
740         snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
741         snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
742         return snb_gmch_ctl << 20;
743 }
744
745 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
746 {
747         bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
748         bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
749         if (bdw_gmch_ctl)
750                 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
751
752 #ifdef CONFIG_X86_32
753         /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
754         if (bdw_gmch_ctl > 4)
755                 bdw_gmch_ctl = 4;
756 #endif
757
758         return bdw_gmch_ctl << 20;
759 }
760
761 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
762 {
763         gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
764         gmch_ctrl &= SNB_GMCH_GGMS_MASK;
765
766         if (gmch_ctrl)
767                 return 1 << (20 + gmch_ctrl);
768
769         return 0;
770 }
771
772 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
773 {
774         struct drm_i915_private *i915 = ggtt->vm.i915;
775         struct pci_dev *pdev = i915->drm.pdev;
776         phys_addr_t phys_addr;
777         int ret;
778
779         /* For Modern GENs the PTEs and register space are split in the BAR */
780         phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
781
782         /*
783          * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
784          * will be dropped. For WC mappings in general we have 64 byte burst
785          * writes when the WC buffer is flushed, so we can't use it, but have to
786          * resort to an uncached mapping. The WC issue is easily caught by the
787          * readback check when writing GTT PTE entries.
788          */
789         if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10)
790                 ggtt->gsm = ioremap_nocache(phys_addr, size);
791         else
792                 ggtt->gsm = ioremap_wc(phys_addr, size);
793         if (!ggtt->gsm) {
794                 DRM_ERROR("Failed to map the ggtt page table\n");
795                 return -ENOMEM;
796         }
797
798         ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
799         if (ret) {
800                 DRM_ERROR("Scratch setup failed\n");
801                 /* iounmap will also get called at remove, but meh */
802                 iounmap(ggtt->gsm);
803                 return ret;
804         }
805
806         ggtt->vm.scratch[0].encode =
807                 ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]),
808                                     I915_CACHE_NONE, 0);
809
810         return 0;
811 }
812
813 int ggtt_set_pages(struct i915_vma *vma)
814 {
815         int ret;
816
817         GEM_BUG_ON(vma->pages);
818
819         ret = i915_get_ggtt_vma_pages(vma);
820         if (ret)
821                 return ret;
822
823         vma->page_sizes = vma->obj->mm.page_sizes;
824
825         return 0;
826 }
827
828 static void gen6_gmch_remove(struct i915_address_space *vm)
829 {
830         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
831
832         iounmap(ggtt->gsm);
833         cleanup_scratch_page(vm);
834 }
835
836 static struct resource pci_resource(struct pci_dev *pdev, int bar)
837 {
838         return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar),
839                                                pci_resource_len(pdev, bar));
840 }
841
842 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
843 {
844         struct drm_i915_private *i915 = ggtt->vm.i915;
845         struct pci_dev *pdev = i915->drm.pdev;
846         unsigned int size;
847         u16 snb_gmch_ctl;
848         int err;
849
850         /* TODO: We're not aware of mappable constraints on gen8 yet */
851         if (!IS_DGFX(i915)) {
852                 ggtt->gmadr = pci_resource(pdev, 2);
853                 ggtt->mappable_end = resource_size(&ggtt->gmadr);
854         }
855
856         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
857         if (!err)
858                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
859         if (err)
860                 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
861
862         pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
863         if (IS_CHERRYVIEW(i915))
864                 size = chv_get_total_gtt_size(snb_gmch_ctl);
865         else
866                 size = gen8_get_total_gtt_size(snb_gmch_ctl);
867
868         ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
869         ggtt->vm.cleanup = gen6_gmch_remove;
870         ggtt->vm.insert_page = gen8_ggtt_insert_page;
871         ggtt->vm.clear_range = nop_clear_range;
872         if (intel_scanout_needs_vtd_wa(i915))
873                 ggtt->vm.clear_range = gen8_ggtt_clear_range;
874
875         ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
876
877         /* Serialize GTT updates with aperture access on BXT if VT-d is on. */
878         if (intel_ggtt_update_needs_vtd_wa(i915) ||
879             IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
880                 ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
881                 ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
882                 if (ggtt->vm.clear_range != nop_clear_range)
883                         ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
884         }
885
886         ggtt->invalidate = gen8_ggtt_invalidate;
887
888         ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
889         ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
890         ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
891         ggtt->vm.vma_ops.clear_pages = clear_pages;
892
893         ggtt->vm.pte_encode = gen8_pte_encode;
894
895         setup_private_pat(ggtt->vm.gt->uncore);
896
897         return ggtt_probe_common(ggtt, size);
898 }
899
900 static u64 snb_pte_encode(dma_addr_t addr,
901                           enum i915_cache_level level,
902                           u32 flags)
903 {
904         gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
905
906         switch (level) {
907         case I915_CACHE_L3_LLC:
908         case I915_CACHE_LLC:
909                 pte |= GEN6_PTE_CACHE_LLC;
910                 break;
911         case I915_CACHE_NONE:
912                 pte |= GEN6_PTE_UNCACHED;
913                 break;
914         default:
915                 MISSING_CASE(level);
916         }
917
918         return pte;
919 }
920
921 static u64 ivb_pte_encode(dma_addr_t addr,
922                           enum i915_cache_level level,
923                           u32 flags)
924 {
925         gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
926
927         switch (level) {
928         case I915_CACHE_L3_LLC:
929                 pte |= GEN7_PTE_CACHE_L3_LLC;
930                 break;
931         case I915_CACHE_LLC:
932                 pte |= GEN6_PTE_CACHE_LLC;
933                 break;
934         case I915_CACHE_NONE:
935                 pte |= GEN6_PTE_UNCACHED;
936                 break;
937         default:
938                 MISSING_CASE(level);
939         }
940
941         return pte;
942 }
943
944 static u64 byt_pte_encode(dma_addr_t addr,
945                           enum i915_cache_level level,
946                           u32 flags)
947 {
948         gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
949
950         if (!(flags & PTE_READ_ONLY))
951                 pte |= BYT_PTE_WRITEABLE;
952
953         if (level != I915_CACHE_NONE)
954                 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
955
956         return pte;
957 }
958
959 static u64 hsw_pte_encode(dma_addr_t addr,
960                           enum i915_cache_level level,
961                           u32 flags)
962 {
963         gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
964
965         if (level != I915_CACHE_NONE)
966                 pte |= HSW_WB_LLC_AGE3;
967
968         return pte;
969 }
970
971 static u64 iris_pte_encode(dma_addr_t addr,
972                            enum i915_cache_level level,
973                            u32 flags)
974 {
975         gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
976
977         switch (level) {
978         case I915_CACHE_NONE:
979                 break;
980         case I915_CACHE_WT:
981                 pte |= HSW_WT_ELLC_LLC_AGE3;
982                 break;
983         default:
984                 pte |= HSW_WB_ELLC_LLC_AGE3;
985                 break;
986         }
987
988         return pte;
989 }
990
991 static int gen6_gmch_probe(struct i915_ggtt *ggtt)
992 {
993         struct drm_i915_private *i915 = ggtt->vm.i915;
994         struct pci_dev *pdev = i915->drm.pdev;
995         unsigned int size;
996         u16 snb_gmch_ctl;
997         int err;
998
999         ggtt->gmadr = pci_resource(pdev, 2);
1000         ggtt->mappable_end = resource_size(&ggtt->gmadr);
1001
1002         /*
1003          * 64/512MB is the current min/max we actually know of, but this is
1004          * just a coarse sanity check.
1005          */
1006         if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
1007                 DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
1008                 return -ENXIO;
1009         }
1010
1011         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
1012         if (!err)
1013                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
1014         if (err)
1015                 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
1016         pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
1017
1018         size = gen6_get_total_gtt_size(snb_gmch_ctl);
1019         ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
1020
1021         ggtt->vm.clear_range = nop_clear_range;
1022         if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
1023                 ggtt->vm.clear_range = gen6_ggtt_clear_range;
1024         ggtt->vm.insert_page = gen6_ggtt_insert_page;
1025         ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
1026         ggtt->vm.cleanup = gen6_gmch_remove;
1027
1028         ggtt->invalidate = gen6_ggtt_invalidate;
1029
1030         if (HAS_EDRAM(i915))
1031                 ggtt->vm.pte_encode = iris_pte_encode;
1032         else if (IS_HASWELL(i915))
1033                 ggtt->vm.pte_encode = hsw_pte_encode;
1034         else if (IS_VALLEYVIEW(i915))
1035                 ggtt->vm.pte_encode = byt_pte_encode;
1036         else if (INTEL_GEN(i915) >= 7)
1037                 ggtt->vm.pte_encode = ivb_pte_encode;
1038         else
1039                 ggtt->vm.pte_encode = snb_pte_encode;
1040
1041         ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
1042         ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
1043         ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
1044         ggtt->vm.vma_ops.clear_pages = clear_pages;
1045
1046         return ggtt_probe_common(ggtt, size);
1047 }
1048
1049 static void i915_gmch_remove(struct i915_address_space *vm)
1050 {
1051         intel_gmch_remove();
1052 }
1053
1054 static int i915_gmch_probe(struct i915_ggtt *ggtt)
1055 {
1056         struct drm_i915_private *i915 = ggtt->vm.i915;
1057         phys_addr_t gmadr_base;
1058         int ret;
1059
1060         ret = intel_gmch_probe(i915->bridge_dev, i915->drm.pdev, NULL);
1061         if (!ret) {
1062                 DRM_ERROR("failed to set up gmch\n");
1063                 return -EIO;
1064         }
1065
1066         intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
1067
1068         ggtt->gmadr =
1069                 (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
1070
1071         ggtt->do_idle_maps = needs_idle_maps(i915);
1072         ggtt->vm.insert_page = i915_ggtt_insert_page;
1073         ggtt->vm.insert_entries = i915_ggtt_insert_entries;
1074         ggtt->vm.clear_range = i915_ggtt_clear_range;
1075         ggtt->vm.cleanup = i915_gmch_remove;
1076
1077         ggtt->invalidate = gmch_ggtt_invalidate;
1078
1079         ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
1080         ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
1081         ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
1082         ggtt->vm.vma_ops.clear_pages = clear_pages;
1083
1084         if (unlikely(ggtt->do_idle_maps))
1085                 dev_notice(i915->drm.dev,
1086                            "Applying Ironlake quirks for intel_iommu\n");
1087
1088         return 0;
1089 }
1090
1091 static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
1092 {
1093         struct drm_i915_private *i915 = gt->i915;
1094         int ret;
1095
1096         ggtt->vm.gt = gt;
1097         ggtt->vm.i915 = i915;
1098         ggtt->vm.dma = &i915->drm.pdev->dev;
1099
1100         if (INTEL_GEN(i915) <= 5)
1101                 ret = i915_gmch_probe(ggtt);
1102         else if (INTEL_GEN(i915) < 8)
1103                 ret = gen6_gmch_probe(ggtt);
1104         else
1105                 ret = gen8_gmch_probe(ggtt);
1106         if (ret)
1107                 return ret;
1108
1109         if ((ggtt->vm.total - 1) >> 32) {
1110                 DRM_ERROR("We never expected a Global GTT with more than 32bits"
1111                           " of address space! Found %lldM!\n",
1112                           ggtt->vm.total >> 20);
1113                 ggtt->vm.total = 1ULL << 32;
1114                 ggtt->mappable_end =
1115                         min_t(u64, ggtt->mappable_end, ggtt->vm.total);
1116         }
1117
1118         if (ggtt->mappable_end > ggtt->vm.total) {
1119                 DRM_ERROR("mappable aperture extends past end of GGTT,"
1120                           " aperture=%pa, total=%llx\n",
1121                           &ggtt->mappable_end, ggtt->vm.total);
1122                 ggtt->mappable_end = ggtt->vm.total;
1123         }
1124
1125         /* GMADR is the PCI mmio aperture into the global GTT. */
1126         DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20);
1127         DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
1128         DRM_DEBUG_DRIVER("DSM size = %lluM\n",
1129                          (u64)resource_size(&intel_graphics_stolen_res) >> 20);
1130
1131         return 0;
1132 }
1133
1134 /**
1135  * i915_ggtt_probe_hw - Probe GGTT hardware location
1136  * @i915: i915 device
1137  */
1138 int i915_ggtt_probe_hw(struct drm_i915_private *i915)
1139 {
1140         int ret;
1141
1142         ret = ggtt_probe_hw(&i915->ggtt, &i915->gt);
1143         if (ret)
1144                 return ret;
1145
1146         if (intel_vtd_active())
1147                 dev_info(i915->drm.dev, "VT-d active for gfx access\n");
1148
1149         return 0;
1150 }
1151
1152 int i915_ggtt_enable_hw(struct drm_i915_private *i915)
1153 {
1154         if (INTEL_GEN(i915) < 6 && !intel_enable_gtt())
1155                 return -EIO;
1156
1157         return 0;
1158 }
1159
1160 void i915_ggtt_enable_guc(struct i915_ggtt *ggtt)
1161 {
1162         GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate);
1163
1164         ggtt->invalidate = guc_ggtt_invalidate;
1165
1166         ggtt->invalidate(ggtt);
1167 }
1168
1169 void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
1170 {
1171         /* XXX Temporary pardon for error unload */
1172         if (ggtt->invalidate == gen8_ggtt_invalidate)
1173                 return;
1174
1175         /* We should only be called after i915_ggtt_enable_guc() */
1176         GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate);
1177
1178         ggtt->invalidate = gen8_ggtt_invalidate;
1179
1180         ggtt->invalidate(ggtt);
1181 }
1182
1183 static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
1184 {
1185         struct i915_vma *vma;
1186         bool flush = false;
1187         int open;
1188
1189         intel_gt_check_and_clear_faults(ggtt->vm.gt);
1190
1191         mutex_lock(&ggtt->vm.mutex);
1192
1193         /* First fill our portion of the GTT with scratch pages */
1194         ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
1195
1196         /* Skip rewriting PTE on VMA unbind. */
1197         open = atomic_xchg(&ggtt->vm.open, 0);
1198
1199         /* clflush objects bound into the GGTT and rebind them. */
1200         list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) {
1201                 struct drm_i915_gem_object *obj = vma->obj;
1202
1203                 if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
1204                         continue;
1205
1206                 clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma));
1207                 WARN_ON(i915_vma_bind(vma,
1208                                       obj ? obj->cache_level : 0,
1209                                       PIN_GLOBAL, NULL));
1210                 if (obj) { /* only used during resume => exclusive access */
1211                         flush |= fetch_and_zero(&obj->write_domain);
1212                         obj->read_domains |= I915_GEM_DOMAIN_GTT;
1213                 }
1214         }
1215
1216         atomic_set(&ggtt->vm.open, open);
1217         ggtt->invalidate(ggtt);
1218
1219         mutex_unlock(&ggtt->vm.mutex);
1220
1221         if (flush)
1222                 wbinvd_on_all_cpus();
1223 }
1224
1225 void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915)
1226 {
1227         struct i915_ggtt *ggtt = &i915->ggtt;
1228
1229         ggtt_restore_mappings(ggtt);
1230
1231         if (INTEL_GEN(i915) >= 8)
1232                 setup_private_pat(ggtt->vm.gt->uncore);
1233 }
1234
1235 static struct scatterlist *
1236 rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset,
1237              unsigned int width, unsigned int height,
1238              unsigned int stride,
1239              struct sg_table *st, struct scatterlist *sg)
1240 {
1241         unsigned int column, row;
1242         unsigned int src_idx;
1243
1244         for (column = 0; column < width; column++) {
1245                 src_idx = stride * (height - 1) + column + offset;
1246                 for (row = 0; row < height; row++) {
1247                         st->nents++;
1248                         /*
1249                          * We don't need the pages, but need to initialize
1250                          * the entries so the sg list can be happily traversed.
1251                          * The only thing we need are DMA addresses.
1252                          */
1253                         sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
1254                         sg_dma_address(sg) =
1255                                 i915_gem_object_get_dma_address(obj, src_idx);
1256                         sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
1257                         sg = sg_next(sg);
1258                         src_idx -= stride;
1259                 }
1260         }
1261
1262         return sg;
1263 }
1264
1265 static noinline struct sg_table *
1266 intel_rotate_pages(struct intel_rotation_info *rot_info,
1267                    struct drm_i915_gem_object *obj)
1268 {
1269         unsigned int size = intel_rotation_info_size(rot_info);
1270         struct sg_table *st;
1271         struct scatterlist *sg;
1272         int ret = -ENOMEM;
1273         int i;
1274
1275         /* Allocate target SG list. */
1276         st = kmalloc(sizeof(*st), GFP_KERNEL);
1277         if (!st)
1278                 goto err_st_alloc;
1279
1280         ret = sg_alloc_table(st, size, GFP_KERNEL);
1281         if (ret)
1282                 goto err_sg_alloc;
1283
1284         st->nents = 0;
1285         sg = st->sgl;
1286
1287         for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
1288                 sg = rotate_pages(obj, rot_info->plane[i].offset,
1289                                   rot_info->plane[i].width, rot_info->plane[i].height,
1290                                   rot_info->plane[i].stride, st, sg);
1291         }
1292
1293         return st;
1294
1295 err_sg_alloc:
1296         kfree(st);
1297 err_st_alloc:
1298
1299         DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
1300                          obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
1301
1302         return ERR_PTR(ret);
1303 }
1304
1305 static struct scatterlist *
1306 remap_pages(struct drm_i915_gem_object *obj, unsigned int offset,
1307             unsigned int width, unsigned int height,
1308             unsigned int stride,
1309             struct sg_table *st, struct scatterlist *sg)
1310 {
1311         unsigned int row;
1312
1313         for (row = 0; row < height; row++) {
1314                 unsigned int left = width * I915_GTT_PAGE_SIZE;
1315
1316                 while (left) {
1317                         dma_addr_t addr;
1318                         unsigned int length;
1319
1320                         /*
1321                          * We don't need the pages, but need to initialize
1322                          * the entries so the sg list can be happily traversed.
1323                          * The only thing we need are DMA addresses.
1324                          */
1325
1326                         addr = i915_gem_object_get_dma_address_len(obj, offset, &length);
1327
1328                         length = min(left, length);
1329
1330                         st->nents++;
1331
1332                         sg_set_page(sg, NULL, length, 0);
1333                         sg_dma_address(sg) = addr;
1334                         sg_dma_len(sg) = length;
1335                         sg = sg_next(sg);
1336
1337                         offset += length / I915_GTT_PAGE_SIZE;
1338                         left -= length;
1339                 }
1340
1341                 offset += stride - width;
1342         }
1343
1344         return sg;
1345 }
1346
1347 static noinline struct sg_table *
1348 intel_remap_pages(struct intel_remapped_info *rem_info,
1349                   struct drm_i915_gem_object *obj)
1350 {
1351         unsigned int size = intel_remapped_info_size(rem_info);
1352         struct sg_table *st;
1353         struct scatterlist *sg;
1354         int ret = -ENOMEM;
1355         int i;
1356
1357         /* Allocate target SG list. */
1358         st = kmalloc(sizeof(*st), GFP_KERNEL);
1359         if (!st)
1360                 goto err_st_alloc;
1361
1362         ret = sg_alloc_table(st, size, GFP_KERNEL);
1363         if (ret)
1364                 goto err_sg_alloc;
1365
1366         st->nents = 0;
1367         sg = st->sgl;
1368
1369         for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
1370                 sg = remap_pages(obj, rem_info->plane[i].offset,
1371                                  rem_info->plane[i].width, rem_info->plane[i].height,
1372                                  rem_info->plane[i].stride, st, sg);
1373         }
1374
1375         i915_sg_trim(st);
1376
1377         return st;
1378
1379 err_sg_alloc:
1380         kfree(st);
1381 err_st_alloc:
1382
1383         DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
1384                          obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size);
1385
1386         return ERR_PTR(ret);
1387 }
1388
1389 static noinline struct sg_table *
1390 intel_partial_pages(const struct i915_ggtt_view *view,
1391                     struct drm_i915_gem_object *obj)
1392 {
1393         struct sg_table *st;
1394         struct scatterlist *sg, *iter;
1395         unsigned int count = view->partial.size;
1396         unsigned int offset;
1397         int ret = -ENOMEM;
1398
1399         st = kmalloc(sizeof(*st), GFP_KERNEL);
1400         if (!st)
1401                 goto err_st_alloc;
1402
1403         ret = sg_alloc_table(st, count, GFP_KERNEL);
1404         if (ret)
1405                 goto err_sg_alloc;
1406
1407         iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
1408         GEM_BUG_ON(!iter);
1409
1410         sg = st->sgl;
1411         st->nents = 0;
1412         do {
1413                 unsigned int len;
1414
1415                 len = min(iter->length - (offset << PAGE_SHIFT),
1416                           count << PAGE_SHIFT);
1417                 sg_set_page(sg, NULL, len, 0);
1418                 sg_dma_address(sg) =
1419                         sg_dma_address(iter) + (offset << PAGE_SHIFT);
1420                 sg_dma_len(sg) = len;
1421
1422                 st->nents++;
1423                 count -= len >> PAGE_SHIFT;
1424                 if (count == 0) {
1425                         sg_mark_end(sg);
1426                         i915_sg_trim(st); /* Drop any unused tail entries. */
1427
1428                         return st;
1429                 }
1430
1431                 sg = __sg_next(sg);
1432                 iter = __sg_next(iter);
1433                 offset = 0;
1434         } while (1);
1435
1436 err_sg_alloc:
1437         kfree(st);
1438 err_st_alloc:
1439         return ERR_PTR(ret);
1440 }
1441
1442 static int
1443 i915_get_ggtt_vma_pages(struct i915_vma *vma)
1444 {
1445         int ret;
1446
1447         /*
1448          * The vma->pages are only valid within the lifespan of the borrowed
1449          * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
1450          * must be the vma->pages. A simple rule is that vma->pages must only
1451          * be accessed when the obj->mm.pages are pinned.
1452          */
1453         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
1454
1455         switch (vma->ggtt_view.type) {
1456         default:
1457                 GEM_BUG_ON(vma->ggtt_view.type);
1458                 /* fall through */
1459         case I915_GGTT_VIEW_NORMAL:
1460                 vma->pages = vma->obj->mm.pages;
1461                 return 0;
1462
1463         case I915_GGTT_VIEW_ROTATED:
1464                 vma->pages =
1465                         intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
1466                 break;
1467
1468         case I915_GGTT_VIEW_REMAPPED:
1469                 vma->pages =
1470                         intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
1471                 break;
1472
1473         case I915_GGTT_VIEW_PARTIAL:
1474                 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
1475                 break;
1476         }
1477
1478         ret = 0;
1479         if (IS_ERR(vma->pages)) {
1480                 ret = PTR_ERR(vma->pages);
1481                 vma->pages = NULL;
1482                 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
1483                           vma->ggtt_view.type, ret);
1484         }
1485         return ret;
1486 }