drm/i915: add support for 64K scratch page
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / i915_gem_gtt.c
1 /*
2  * Copyright © 2010 Daniel Vetter
3  * Copyright © 2011-2014 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  */
25
26 #include <linux/slab.h> /* fault-inject.h is not standalone! */
27
28 #include <linux/fault-inject.h>
29 #include <linux/log2.h>
30 #include <linux/random.h>
31 #include <linux/seq_file.h>
32 #include <linux/stop_machine.h>
33
34 #include <asm/set_memory.h>
35
36 #include <drm/drmP.h>
37 #include <drm/i915_drm.h>
38
39 #include "i915_drv.h"
40 #include "i915_vgpu.h"
41 #include "i915_trace.h"
42 #include "intel_drv.h"
43 #include "intel_frontbuffer.h"
44
45 #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM)
46
47 /**
48  * DOC: Global GTT views
49  *
50  * Background and previous state
51  *
52  * Historically objects could exists (be bound) in global GTT space only as
53  * singular instances with a view representing all of the object's backing pages
54  * in a linear fashion. This view will be called a normal view.
55  *
56  * To support multiple views of the same object, where the number of mapped
57  * pages is not equal to the backing store, or where the layout of the pages
58  * is not linear, concept of a GGTT view was added.
59  *
60  * One example of an alternative view is a stereo display driven by a single
61  * image. In this case we would have a framebuffer looking like this
62  * (2x2 pages):
63  *
64  *    12
65  *    34
66  *
67  * Above would represent a normal GGTT view as normally mapped for GPU or CPU
68  * rendering. In contrast, fed to the display engine would be an alternative
69  * view which could look something like this:
70  *
71  *   1212
72  *   3434
73  *
74  * In this example both the size and layout of pages in the alternative view is
75  * different from the normal view.
76  *
77  * Implementation and usage
78  *
79  * GGTT views are implemented using VMAs and are distinguished via enum
80  * i915_ggtt_view_type and struct i915_ggtt_view.
81  *
82  * A new flavour of core GEM functions which work with GGTT bound objects were
83  * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
84  * renaming  in large amounts of code. They take the struct i915_ggtt_view
85  * parameter encapsulating all metadata required to implement a view.
86  *
87  * As a helper for callers which are only interested in the normal view,
88  * globally const i915_ggtt_view_normal singleton instance exists. All old core
89  * GEM API functions, the ones not taking the view parameter, are operating on,
90  * or with the normal GGTT view.
91  *
92  * Code wanting to add or use a new GGTT view needs to:
93  *
94  * 1. Add a new enum with a suitable name.
95  * 2. Extend the metadata in the i915_ggtt_view structure if required.
96  * 3. Add support to i915_get_vma_pages().
97  *
98  * New views are required to build a scatter-gather table from within the
99  * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
100  * exists for the lifetime of an VMA.
101  *
102  * Core API is designed to have copy semantics which means that passed in
103  * struct i915_ggtt_view does not need to be persistent (left around after
104  * calling the core API functions).
105  *
106  */
107
108 static int
109 i915_get_ggtt_vma_pages(struct i915_vma *vma);
110
111 static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv)
112 {
113         /* Note that as an uncached mmio write, this should flush the
114          * WCB of the writes into the GGTT before it triggers the invalidate.
115          */
116         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
117 }
118
119 static void guc_ggtt_invalidate(struct drm_i915_private *dev_priv)
120 {
121         gen6_ggtt_invalidate(dev_priv);
122         I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
123 }
124
125 static void gmch_ggtt_invalidate(struct drm_i915_private *dev_priv)
126 {
127         intel_gtt_chipset_flush();
128 }
129
130 static inline void i915_ggtt_invalidate(struct drm_i915_private *i915)
131 {
132         i915->ggtt.invalidate(i915);
133 }
134
135 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
136                                 int enable_ppgtt)
137 {
138         bool has_aliasing_ppgtt;
139         bool has_full_ppgtt;
140         bool has_full_48bit_ppgtt;
141
142         has_aliasing_ppgtt = dev_priv->info.has_aliasing_ppgtt;
143         has_full_ppgtt = dev_priv->info.has_full_ppgtt;
144         has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt;
145
146         if (intel_vgpu_active(dev_priv)) {
147                 /* GVT-g has no support for 32bit ppgtt */
148                 has_full_ppgtt = false;
149                 has_full_48bit_ppgtt = intel_vgpu_has_full_48bit_ppgtt(dev_priv);
150         }
151
152         if (!has_aliasing_ppgtt)
153                 return 0;
154
155         /*
156          * We don't allow disabling PPGTT for gen9+ as it's a requirement for
157          * execlists, the sole mechanism available to submit work.
158          */
159         if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9)
160                 return 0;
161
162         if (enable_ppgtt == 1)
163                 return 1;
164
165         if (enable_ppgtt == 2 && has_full_ppgtt)
166                 return 2;
167
168         if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
169                 return 3;
170
171         /* Disable ppgtt on SNB if VT-d is on. */
172         if (IS_GEN6(dev_priv) && intel_vtd_active()) {
173                 DRM_INFO("Disabling PPGTT because VT-d is on\n");
174                 return 0;
175         }
176
177         /* Early VLV doesn't have this */
178         if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) {
179                 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
180                 return 0;
181         }
182
183         if (INTEL_GEN(dev_priv) >= 8 && i915_modparams.enable_execlists) {
184                 if (has_full_48bit_ppgtt)
185                         return 3;
186
187                 if (has_full_ppgtt)
188                         return 2;
189         }
190
191         return has_aliasing_ppgtt ? 1 : 0;
192 }
193
194 static int ppgtt_bind_vma(struct i915_vma *vma,
195                           enum i915_cache_level cache_level,
196                           u32 unused)
197 {
198         u32 pte_flags;
199         int ret;
200
201         if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
202                 ret = vma->vm->allocate_va_range(vma->vm, vma->node.start,
203                                                  vma->size);
204                 if (ret)
205                         return ret;
206         }
207
208         /* Currently applicable only to VLV */
209         pte_flags = 0;
210         if (vma->obj->gt_ro)
211                 pte_flags |= PTE_READ_ONLY;
212
213         vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
214
215         return 0;
216 }
217
218 static void ppgtt_unbind_vma(struct i915_vma *vma)
219 {
220         vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
221 }
222
223 static int ppgtt_set_pages(struct i915_vma *vma)
224 {
225         GEM_BUG_ON(vma->pages);
226
227         vma->pages = vma->obj->mm.pages;
228
229         vma->page_sizes = vma->obj->mm.page_sizes;
230
231         return 0;
232 }
233
234 static void clear_pages(struct i915_vma *vma)
235 {
236         GEM_BUG_ON(!vma->pages);
237
238         if (vma->pages != vma->obj->mm.pages) {
239                 sg_free_table(vma->pages);
240                 kfree(vma->pages);
241         }
242         vma->pages = NULL;
243
244         memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
245 }
246
247 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
248                                   enum i915_cache_level level)
249 {
250         gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW;
251         pte |= addr;
252
253         switch (level) {
254         case I915_CACHE_NONE:
255                 pte |= PPAT_UNCACHED;
256                 break;
257         case I915_CACHE_WT:
258                 pte |= PPAT_DISPLAY_ELLC;
259                 break;
260         default:
261                 pte |= PPAT_CACHED;
262                 break;
263         }
264
265         return pte;
266 }
267
268 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
269                                   const enum i915_cache_level level)
270 {
271         gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
272         pde |= addr;
273         if (level != I915_CACHE_NONE)
274                 pde |= PPAT_CACHED_PDE;
275         else
276                 pde |= PPAT_UNCACHED;
277         return pde;
278 }
279
280 #define gen8_pdpe_encode gen8_pde_encode
281 #define gen8_pml4e_encode gen8_pde_encode
282
283 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
284                                  enum i915_cache_level level,
285                                  u32 unused)
286 {
287         gen6_pte_t pte = GEN6_PTE_VALID;
288         pte |= GEN6_PTE_ADDR_ENCODE(addr);
289
290         switch (level) {
291         case I915_CACHE_L3_LLC:
292         case I915_CACHE_LLC:
293                 pte |= GEN6_PTE_CACHE_LLC;
294                 break;
295         case I915_CACHE_NONE:
296                 pte |= GEN6_PTE_UNCACHED;
297                 break;
298         default:
299                 MISSING_CASE(level);
300         }
301
302         return pte;
303 }
304
305 static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
306                                  enum i915_cache_level level,
307                                  u32 unused)
308 {
309         gen6_pte_t pte = GEN6_PTE_VALID;
310         pte |= GEN6_PTE_ADDR_ENCODE(addr);
311
312         switch (level) {
313         case I915_CACHE_L3_LLC:
314                 pte |= GEN7_PTE_CACHE_L3_LLC;
315                 break;
316         case I915_CACHE_LLC:
317                 pte |= GEN6_PTE_CACHE_LLC;
318                 break;
319         case I915_CACHE_NONE:
320                 pte |= GEN6_PTE_UNCACHED;
321                 break;
322         default:
323                 MISSING_CASE(level);
324         }
325
326         return pte;
327 }
328
329 static gen6_pte_t byt_pte_encode(dma_addr_t addr,
330                                  enum i915_cache_level level,
331                                  u32 flags)
332 {
333         gen6_pte_t pte = GEN6_PTE_VALID;
334         pte |= GEN6_PTE_ADDR_ENCODE(addr);
335
336         if (!(flags & PTE_READ_ONLY))
337                 pte |= BYT_PTE_WRITEABLE;
338
339         if (level != I915_CACHE_NONE)
340                 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
341
342         return pte;
343 }
344
345 static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
346                                  enum i915_cache_level level,
347                                  u32 unused)
348 {
349         gen6_pte_t pte = GEN6_PTE_VALID;
350         pte |= HSW_PTE_ADDR_ENCODE(addr);
351
352         if (level != I915_CACHE_NONE)
353                 pte |= HSW_WB_LLC_AGE3;
354
355         return pte;
356 }
357
358 static gen6_pte_t iris_pte_encode(dma_addr_t addr,
359                                   enum i915_cache_level level,
360                                   u32 unused)
361 {
362         gen6_pte_t pte = GEN6_PTE_VALID;
363         pte |= HSW_PTE_ADDR_ENCODE(addr);
364
365         switch (level) {
366         case I915_CACHE_NONE:
367                 break;
368         case I915_CACHE_WT:
369                 pte |= HSW_WT_ELLC_LLC_AGE3;
370                 break;
371         default:
372                 pte |= HSW_WB_ELLC_LLC_AGE3;
373                 break;
374         }
375
376         return pte;
377 }
378
379 static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
380 {
381         struct pagevec *pvec = &vm->free_pages;
382
383         if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
384                 i915_gem_shrink_all(vm->i915);
385
386         if (likely(pvec->nr))
387                 return pvec->pages[--pvec->nr];
388
389         if (!vm->pt_kmap_wc)
390                 return alloc_page(gfp);
391
392         /* A placeholder for a specific mutex to guard the WC stash */
393         lockdep_assert_held(&vm->i915->drm.struct_mutex);
394
395         /* Look in our global stash of WC pages... */
396         pvec = &vm->i915->mm.wc_stash;
397         if (likely(pvec->nr))
398                 return pvec->pages[--pvec->nr];
399
400         /* Otherwise batch allocate pages to amoritize cost of set_pages_wc. */
401         do {
402                 struct page *page;
403
404                 page = alloc_page(gfp);
405                 if (unlikely(!page))
406                         break;
407
408                 pvec->pages[pvec->nr++] = page;
409         } while (pagevec_space(pvec));
410
411         if (unlikely(!pvec->nr))
412                 return NULL;
413
414         set_pages_array_wc(pvec->pages, pvec->nr);
415
416         return pvec->pages[--pvec->nr];
417 }
418
419 static void vm_free_pages_release(struct i915_address_space *vm,
420                                   bool immediate)
421 {
422         struct pagevec *pvec = &vm->free_pages;
423
424         GEM_BUG_ON(!pagevec_count(pvec));
425
426         if (vm->pt_kmap_wc) {
427                 struct pagevec *stash = &vm->i915->mm.wc_stash;
428
429                 /* When we use WC, first fill up the global stash and then
430                  * only if full immediately free the overflow.
431                  */
432
433                 lockdep_assert_held(&vm->i915->drm.struct_mutex);
434                 if (pagevec_space(stash)) {
435                         do {
436                                 stash->pages[stash->nr++] =
437                                         pvec->pages[--pvec->nr];
438                                 if (!pvec->nr)
439                                         return;
440                         } while (pagevec_space(stash));
441
442                         /* As we have made some room in the VM's free_pages,
443                          * we can wait for it to fill again. Unless we are
444                          * inside i915_address_space_fini() and must
445                          * immediately release the pages!
446                          */
447                         if (!immediate)
448                                 return;
449                 }
450
451                 set_pages_array_wb(pvec->pages, pvec->nr);
452         }
453
454         __pagevec_release(pvec);
455 }
456
457 static void vm_free_page(struct i915_address_space *vm, struct page *page)
458 {
459         if (!pagevec_add(&vm->free_pages, page))
460                 vm_free_pages_release(vm, false);
461 }
462
463 static int __setup_page_dma(struct i915_address_space *vm,
464                             struct i915_page_dma *p,
465                             gfp_t gfp)
466 {
467         p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY);
468         if (unlikely(!p->page))
469                 return -ENOMEM;
470
471         p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE,
472                                 PCI_DMA_BIDIRECTIONAL);
473         if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
474                 vm_free_page(vm, p->page);
475                 return -ENOMEM;
476         }
477
478         return 0;
479 }
480
481 static int setup_page_dma(struct i915_address_space *vm,
482                           struct i915_page_dma *p)
483 {
484         return __setup_page_dma(vm, p, I915_GFP_DMA);
485 }
486
487 static void cleanup_page_dma(struct i915_address_space *vm,
488                              struct i915_page_dma *p)
489 {
490         dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
491         vm_free_page(vm, p->page);
492 }
493
494 #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
495
496 #define setup_px(vm, px) setup_page_dma((vm), px_base(px))
497 #define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
498 #define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v))
499 #define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v))
500
501 static void fill_page_dma(struct i915_address_space *vm,
502                           struct i915_page_dma *p,
503                           const u64 val)
504 {
505         u64 * const vaddr = kmap_atomic(p->page);
506
507         memset64(vaddr, val, PAGE_SIZE / sizeof(val));
508
509         kunmap_atomic(vaddr);
510 }
511
512 static void fill_page_dma_32(struct i915_address_space *vm,
513                              struct i915_page_dma *p,
514                              const u32 v)
515 {
516         fill_page_dma(vm, p, (u64)v << 32 | v);
517 }
518
519 static int
520 setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
521 {
522         struct page *page = NULL;
523         dma_addr_t addr;
524         int order;
525
526         /*
527          * In order to utilize 64K pages for an object with a size < 2M, we will
528          * need to support a 64K scratch page, given that every 16th entry for a
529          * page-table operating in 64K mode must point to a properly aligned 64K
530          * region, including any PTEs which happen to point to scratch.
531          *
532          * This is only relevant for the 48b PPGTT where we support
533          * huge-gtt-pages, see also i915_vma_insert().
534          *
535          * TODO: we should really consider write-protecting the scratch-page and
536          * sharing between ppgtt
537          */
538         if (i915_vm_is_48bit(vm) &&
539             HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
540                 order = get_order(I915_GTT_PAGE_SIZE_64K);
541                 page = alloc_pages(gfp | __GFP_ZERO, order);
542                 if (page) {
543                         addr = dma_map_page(vm->dma, page, 0,
544                                             I915_GTT_PAGE_SIZE_64K,
545                                             PCI_DMA_BIDIRECTIONAL);
546                         if (unlikely(dma_mapping_error(vm->dma, addr))) {
547                                 __free_pages(page, order);
548                                 page = NULL;
549                         }
550
551                         if (!IS_ALIGNED(addr, I915_GTT_PAGE_SIZE_64K)) {
552                                 dma_unmap_page(vm->dma, addr,
553                                                I915_GTT_PAGE_SIZE_64K,
554                                                PCI_DMA_BIDIRECTIONAL);
555                                 __free_pages(page, order);
556                                 page = NULL;
557                         }
558                 }
559         }
560
561         if (!page) {
562                 order = 0;
563                 page = alloc_page(gfp | __GFP_ZERO);
564                 if (unlikely(!page))
565                         return -ENOMEM;
566
567                 addr = dma_map_page(vm->dma, page, 0, PAGE_SIZE,
568                                     PCI_DMA_BIDIRECTIONAL);
569                 if (unlikely(dma_mapping_error(vm->dma, addr))) {
570                         __free_page(page);
571                         return -ENOMEM;
572                 }
573         }
574
575         vm->scratch_page.page = page;
576         vm->scratch_page.daddr = addr;
577         vm->scratch_page.order = order;
578
579         return 0;
580 }
581
582 static void cleanup_scratch_page(struct i915_address_space *vm)
583 {
584         struct i915_page_dma *p = &vm->scratch_page;
585
586         dma_unmap_page(vm->dma, p->daddr, BIT(p->order) << PAGE_SHIFT,
587                        PCI_DMA_BIDIRECTIONAL);
588         __free_pages(p->page, p->order);
589 }
590
591 static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
592 {
593         struct i915_page_table *pt;
594
595         pt = kmalloc(sizeof(*pt), GFP_KERNEL | __GFP_NOWARN);
596         if (unlikely(!pt))
597                 return ERR_PTR(-ENOMEM);
598
599         if (unlikely(setup_px(vm, pt))) {
600                 kfree(pt);
601                 return ERR_PTR(-ENOMEM);
602         }
603
604         pt->used_ptes = 0;
605         return pt;
606 }
607
608 static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
609 {
610         cleanup_px(vm, pt);
611         kfree(pt);
612 }
613
614 static void gen8_initialize_pt(struct i915_address_space *vm,
615                                struct i915_page_table *pt)
616 {
617         fill_px(vm, pt,
618                 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC));
619 }
620
621 static void gen6_initialize_pt(struct i915_address_space *vm,
622                                struct i915_page_table *pt)
623 {
624         fill32_px(vm, pt,
625                   vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0));
626 }
627
628 static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
629 {
630         struct i915_page_directory *pd;
631
632         pd = kzalloc(sizeof(*pd), GFP_KERNEL | __GFP_NOWARN);
633         if (unlikely(!pd))
634                 return ERR_PTR(-ENOMEM);
635
636         if (unlikely(setup_px(vm, pd))) {
637                 kfree(pd);
638                 return ERR_PTR(-ENOMEM);
639         }
640
641         pd->used_pdes = 0;
642         return pd;
643 }
644
645 static void free_pd(struct i915_address_space *vm,
646                     struct i915_page_directory *pd)
647 {
648         cleanup_px(vm, pd);
649         kfree(pd);
650 }
651
652 static void gen8_initialize_pd(struct i915_address_space *vm,
653                                struct i915_page_directory *pd)
654 {
655         unsigned int i;
656
657         fill_px(vm, pd,
658                 gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC));
659         for (i = 0; i < I915_PDES; i++)
660                 pd->page_table[i] = vm->scratch_pt;
661 }
662
663 static int __pdp_init(struct i915_address_space *vm,
664                       struct i915_page_directory_pointer *pdp)
665 {
666         const unsigned int pdpes = i915_pdpes_per_pdp(vm);
667         unsigned int i;
668
669         pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
670                                             GFP_KERNEL | __GFP_NOWARN);
671         if (unlikely(!pdp->page_directory))
672                 return -ENOMEM;
673
674         for (i = 0; i < pdpes; i++)
675                 pdp->page_directory[i] = vm->scratch_pd;
676
677         return 0;
678 }
679
680 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
681 {
682         kfree(pdp->page_directory);
683         pdp->page_directory = NULL;
684 }
685
686 static inline bool use_4lvl(const struct i915_address_space *vm)
687 {
688         return i915_vm_is_48bit(vm);
689 }
690
691 static struct i915_page_directory_pointer *
692 alloc_pdp(struct i915_address_space *vm)
693 {
694         struct i915_page_directory_pointer *pdp;
695         int ret = -ENOMEM;
696
697         WARN_ON(!use_4lvl(vm));
698
699         pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
700         if (!pdp)
701                 return ERR_PTR(-ENOMEM);
702
703         ret = __pdp_init(vm, pdp);
704         if (ret)
705                 goto fail_bitmap;
706
707         ret = setup_px(vm, pdp);
708         if (ret)
709                 goto fail_page_m;
710
711         return pdp;
712
713 fail_page_m:
714         __pdp_fini(pdp);
715 fail_bitmap:
716         kfree(pdp);
717
718         return ERR_PTR(ret);
719 }
720
721 static void free_pdp(struct i915_address_space *vm,
722                      struct i915_page_directory_pointer *pdp)
723 {
724         __pdp_fini(pdp);
725
726         if (!use_4lvl(vm))
727                 return;
728
729         cleanup_px(vm, pdp);
730         kfree(pdp);
731 }
732
733 static void gen8_initialize_pdp(struct i915_address_space *vm,
734                                 struct i915_page_directory_pointer *pdp)
735 {
736         gen8_ppgtt_pdpe_t scratch_pdpe;
737
738         scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
739
740         fill_px(vm, pdp, scratch_pdpe);
741 }
742
743 static void gen8_initialize_pml4(struct i915_address_space *vm,
744                                  struct i915_pml4 *pml4)
745 {
746         unsigned int i;
747
748         fill_px(vm, pml4,
749                 gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC));
750         for (i = 0; i < GEN8_PML4ES_PER_PML4; i++)
751                 pml4->pdps[i] = vm->scratch_pdp;
752 }
753
754 /* Broadwell Page Directory Pointer Descriptors */
755 static int gen8_write_pdp(struct drm_i915_gem_request *req,
756                           unsigned entry,
757                           dma_addr_t addr)
758 {
759         struct intel_engine_cs *engine = req->engine;
760         u32 *cs;
761
762         BUG_ON(entry >= 4);
763
764         cs = intel_ring_begin(req, 6);
765         if (IS_ERR(cs))
766                 return PTR_ERR(cs);
767
768         *cs++ = MI_LOAD_REGISTER_IMM(1);
769         *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, entry));
770         *cs++ = upper_32_bits(addr);
771         *cs++ = MI_LOAD_REGISTER_IMM(1);
772         *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry));
773         *cs++ = lower_32_bits(addr);
774         intel_ring_advance(req, cs);
775
776         return 0;
777 }
778
779 static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt,
780                                struct drm_i915_gem_request *req)
781 {
782         int i, ret;
783
784         for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) {
785                 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
786
787                 ret = gen8_write_pdp(req, i, pd_daddr);
788                 if (ret)
789                         return ret;
790         }
791
792         return 0;
793 }
794
795 static int gen8_mm_switch_4lvl(struct i915_hw_ppgtt *ppgtt,
796                                struct drm_i915_gem_request *req)
797 {
798         return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
799 }
800
801 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
802  * the page table structures, we mark them dirty so that
803  * context switching/execlist queuing code takes extra steps
804  * to ensure that tlbs are flushed.
805  */
806 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
807 {
808         ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.i915)->ring_mask;
809 }
810
811 /* Removes entries from a single page table, releasing it if it's empty.
812  * Caller can use the return value to update higher-level entries.
813  */
814 static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
815                                 struct i915_page_table *pt,
816                                 u64 start, u64 length)
817 {
818         unsigned int num_entries = gen8_pte_count(start, length);
819         unsigned int pte = gen8_pte_index(start);
820         unsigned int pte_end = pte + num_entries;
821         const gen8_pte_t scratch_pte =
822                 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
823         gen8_pte_t *vaddr;
824
825         GEM_BUG_ON(num_entries > pt->used_ptes);
826
827         pt->used_ptes -= num_entries;
828         if (!pt->used_ptes)
829                 return true;
830
831         vaddr = kmap_atomic_px(pt);
832         while (pte < pte_end)
833                 vaddr[pte++] = scratch_pte;
834         kunmap_atomic(vaddr);
835
836         return false;
837 }
838
839 static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
840                                struct i915_page_directory *pd,
841                                struct i915_page_table *pt,
842                                unsigned int pde)
843 {
844         gen8_pde_t *vaddr;
845
846         pd->page_table[pde] = pt;
847
848         vaddr = kmap_atomic_px(pd);
849         vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
850         kunmap_atomic(vaddr);
851 }
852
853 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
854                                 struct i915_page_directory *pd,
855                                 u64 start, u64 length)
856 {
857         struct i915_page_table *pt;
858         u32 pde;
859
860         gen8_for_each_pde(pt, pd, start, length, pde) {
861                 GEM_BUG_ON(pt == vm->scratch_pt);
862
863                 if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
864                         continue;
865
866                 gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
867                 GEM_BUG_ON(!pd->used_pdes);
868                 pd->used_pdes--;
869
870                 free_pt(vm, pt);
871         }
872
873         return !pd->used_pdes;
874 }
875
876 static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
877                                 struct i915_page_directory_pointer *pdp,
878                                 struct i915_page_directory *pd,
879                                 unsigned int pdpe)
880 {
881         gen8_ppgtt_pdpe_t *vaddr;
882
883         pdp->page_directory[pdpe] = pd;
884         if (!use_4lvl(vm))
885                 return;
886
887         vaddr = kmap_atomic_px(pdp);
888         vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
889         kunmap_atomic(vaddr);
890 }
891
892 /* Removes entries from a single page dir pointer, releasing it if it's empty.
893  * Caller can use the return value to update higher-level entries
894  */
895 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
896                                  struct i915_page_directory_pointer *pdp,
897                                  u64 start, u64 length)
898 {
899         struct i915_page_directory *pd;
900         unsigned int pdpe;
901
902         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
903                 GEM_BUG_ON(pd == vm->scratch_pd);
904
905                 if (!gen8_ppgtt_clear_pd(vm, pd, start, length))
906                         continue;
907
908                 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
909                 GEM_BUG_ON(!pdp->used_pdpes);
910                 pdp->used_pdpes--;
911
912                 free_pd(vm, pd);
913         }
914
915         return !pdp->used_pdpes;
916 }
917
918 static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
919                                   u64 start, u64 length)
920 {
921         gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
922 }
923
924 static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
925                                  struct i915_page_directory_pointer *pdp,
926                                  unsigned int pml4e)
927 {
928         gen8_ppgtt_pml4e_t *vaddr;
929
930         pml4->pdps[pml4e] = pdp;
931
932         vaddr = kmap_atomic_px(pml4);
933         vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
934         kunmap_atomic(vaddr);
935 }
936
937 /* Removes entries from a single pml4.
938  * This is the top-level structure in 4-level page tables used on gen8+.
939  * Empty entries are always scratch pml4e.
940  */
941 static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
942                                   u64 start, u64 length)
943 {
944         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
945         struct i915_pml4 *pml4 = &ppgtt->pml4;
946         struct i915_page_directory_pointer *pdp;
947         unsigned int pml4e;
948
949         GEM_BUG_ON(!use_4lvl(vm));
950
951         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
952                 GEM_BUG_ON(pdp == vm->scratch_pdp);
953
954                 if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
955                         continue;
956
957                 gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
958
959                 free_pdp(vm, pdp);
960         }
961 }
962
963 struct sgt_dma {
964         struct scatterlist *sg;
965         dma_addr_t dma, max;
966 };
967
968 struct gen8_insert_pte {
969         u16 pml4e;
970         u16 pdpe;
971         u16 pde;
972         u16 pte;
973 };
974
975 static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
976 {
977         return (struct gen8_insert_pte) {
978                  gen8_pml4e_index(start),
979                  gen8_pdpe_index(start),
980                  gen8_pde_index(start),
981                  gen8_pte_index(start),
982         };
983 }
984
985 static __always_inline bool
986 gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
987                               struct i915_page_directory_pointer *pdp,
988                               struct sgt_dma *iter,
989                               struct gen8_insert_pte *idx,
990                               enum i915_cache_level cache_level)
991 {
992         struct i915_page_directory *pd;
993         const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
994         gen8_pte_t *vaddr;
995         bool ret;
996
997         GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base));
998         pd = pdp->page_directory[idx->pdpe];
999         vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
1000         do {
1001                 vaddr[idx->pte] = pte_encode | iter->dma;
1002
1003                 iter->dma += PAGE_SIZE;
1004                 if (iter->dma >= iter->max) {
1005                         iter->sg = __sg_next(iter->sg);
1006                         if (!iter->sg) {
1007                                 ret = false;
1008                                 break;
1009                         }
1010
1011                         iter->dma = sg_dma_address(iter->sg);
1012                         iter->max = iter->dma + iter->sg->length;
1013                 }
1014
1015                 if (++idx->pte == GEN8_PTES) {
1016                         idx->pte = 0;
1017
1018                         if (++idx->pde == I915_PDES) {
1019                                 idx->pde = 0;
1020
1021                                 /* Limited by sg length for 3lvl */
1022                                 if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
1023                                         idx->pdpe = 0;
1024                                         ret = true;
1025                                         break;
1026                                 }
1027
1028                                 GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base));
1029                                 pd = pdp->page_directory[idx->pdpe];
1030                         }
1031
1032                         kunmap_atomic(vaddr);
1033                         vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
1034                 }
1035         } while (1);
1036         kunmap_atomic(vaddr);
1037
1038         return ret;
1039 }
1040
1041 static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
1042                                    struct i915_vma *vma,
1043                                    enum i915_cache_level cache_level,
1044                                    u32 unused)
1045 {
1046         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1047         struct sgt_dma iter = {
1048                 .sg = vma->pages->sgl,
1049                 .dma = sg_dma_address(iter.sg),
1050                 .max = iter.dma + iter.sg->length,
1051         };
1052         struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1053
1054         gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx,
1055                                       cache_level);
1056 }
1057
1058 static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
1059                                            struct i915_page_directory_pointer **pdps,
1060                                            struct sgt_dma *iter,
1061                                            enum i915_cache_level cache_level)
1062 {
1063         const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
1064         u64 start = vma->node.start;
1065         dma_addr_t rem = iter->sg->length;
1066
1067         do {
1068                 struct gen8_insert_pte idx = gen8_insert_pte(start);
1069                 struct i915_page_directory_pointer *pdp = pdps[idx.pml4e];
1070                 struct i915_page_directory *pd = pdp->page_directory[idx.pdpe];
1071                 unsigned int page_size;
1072                 gen8_pte_t encode = pte_encode;
1073                 gen8_pte_t *vaddr;
1074                 u16 index, max;
1075
1076                 if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
1077                     IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
1078                     rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
1079                         index = idx.pde;
1080                         max = I915_PDES;
1081                         page_size = I915_GTT_PAGE_SIZE_2M;
1082
1083                         encode |= GEN8_PDE_PS_2M;
1084
1085                         vaddr = kmap_atomic_px(pd);
1086                 } else {
1087                         struct i915_page_table *pt = pd->page_table[idx.pde];
1088
1089                         index = idx.pte;
1090                         max = GEN8_PTES;
1091                         page_size = I915_GTT_PAGE_SIZE;
1092
1093                         vaddr = kmap_atomic_px(pt);
1094                 }
1095
1096                 do {
1097                         GEM_BUG_ON(iter->sg->length < page_size);
1098                         vaddr[index++] = encode | iter->dma;
1099
1100                         start += page_size;
1101                         iter->dma += page_size;
1102                         rem -= page_size;
1103                         if (iter->dma >= iter->max) {
1104                                 iter->sg = __sg_next(iter->sg);
1105                                 if (!iter->sg)
1106                                         break;
1107
1108                                 rem = iter->sg->length;
1109                                 iter->dma = sg_dma_address(iter->sg);
1110                                 iter->max = iter->dma + rem;
1111
1112                                 if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
1113                                         break;
1114                         }
1115                 } while (rem >= page_size && index < max);
1116
1117                 kunmap_atomic(vaddr);
1118         } while (iter->sg);
1119 }
1120
1121 static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
1122                                    struct i915_vma *vma,
1123                                    enum i915_cache_level cache_level,
1124                                    u32 unused)
1125 {
1126         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1127         struct sgt_dma iter = {
1128                 .sg = vma->pages->sgl,
1129                 .dma = sg_dma_address(iter.sg),
1130                 .max = iter.dma + iter.sg->length,
1131         };
1132         struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
1133
1134         if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
1135                 gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level);
1136         } else {
1137                 struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1138
1139                 while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++],
1140                                                      &iter, &idx, cache_level))
1141                         GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
1142         }
1143 }
1144
1145 static void gen8_free_page_tables(struct i915_address_space *vm,
1146                                   struct i915_page_directory *pd)
1147 {
1148         int i;
1149
1150         if (!px_page(pd))
1151                 return;
1152
1153         for (i = 0; i < I915_PDES; i++) {
1154                 if (pd->page_table[i] != vm->scratch_pt)
1155                         free_pt(vm, pd->page_table[i]);
1156         }
1157 }
1158
1159 static int gen8_init_scratch(struct i915_address_space *vm)
1160 {
1161         int ret;
1162
1163         ret = setup_scratch_page(vm, I915_GFP_DMA);
1164         if (ret)
1165                 return ret;
1166
1167         vm->scratch_pt = alloc_pt(vm);
1168         if (IS_ERR(vm->scratch_pt)) {
1169                 ret = PTR_ERR(vm->scratch_pt);
1170                 goto free_scratch_page;
1171         }
1172
1173         vm->scratch_pd = alloc_pd(vm);
1174         if (IS_ERR(vm->scratch_pd)) {
1175                 ret = PTR_ERR(vm->scratch_pd);
1176                 goto free_pt;
1177         }
1178
1179         if (use_4lvl(vm)) {
1180                 vm->scratch_pdp = alloc_pdp(vm);
1181                 if (IS_ERR(vm->scratch_pdp)) {
1182                         ret = PTR_ERR(vm->scratch_pdp);
1183                         goto free_pd;
1184                 }
1185         }
1186
1187         gen8_initialize_pt(vm, vm->scratch_pt);
1188         gen8_initialize_pd(vm, vm->scratch_pd);
1189         if (use_4lvl(vm))
1190                 gen8_initialize_pdp(vm, vm->scratch_pdp);
1191
1192         return 0;
1193
1194 free_pd:
1195         free_pd(vm, vm->scratch_pd);
1196 free_pt:
1197         free_pt(vm, vm->scratch_pt);
1198 free_scratch_page:
1199         cleanup_scratch_page(vm);
1200
1201         return ret;
1202 }
1203
1204 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
1205 {
1206         struct i915_address_space *vm = &ppgtt->base;
1207         struct drm_i915_private *dev_priv = vm->i915;
1208         enum vgt_g2v_type msg;
1209         int i;
1210
1211         if (use_4lvl(vm)) {
1212                 const u64 daddr = px_dma(&ppgtt->pml4);
1213
1214                 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
1215                 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
1216
1217                 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
1218                                 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
1219         } else {
1220                 for (i = 0; i < GEN8_3LVL_PDPES; i++) {
1221                         const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
1222
1223                         I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
1224                         I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
1225                 }
1226
1227                 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
1228                                 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
1229         }
1230
1231         I915_WRITE(vgtif_reg(g2v_notify), msg);
1232
1233         return 0;
1234 }
1235
1236 static void gen8_free_scratch(struct i915_address_space *vm)
1237 {
1238         if (use_4lvl(vm))
1239                 free_pdp(vm, vm->scratch_pdp);
1240         free_pd(vm, vm->scratch_pd);
1241         free_pt(vm, vm->scratch_pt);
1242         cleanup_scratch_page(vm);
1243 }
1244
1245 static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
1246                                     struct i915_page_directory_pointer *pdp)
1247 {
1248         const unsigned int pdpes = i915_pdpes_per_pdp(vm);
1249         int i;
1250
1251         for (i = 0; i < pdpes; i++) {
1252                 if (pdp->page_directory[i] == vm->scratch_pd)
1253                         continue;
1254
1255                 gen8_free_page_tables(vm, pdp->page_directory[i]);
1256                 free_pd(vm, pdp->page_directory[i]);
1257         }
1258
1259         free_pdp(vm, pdp);
1260 }
1261
1262 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
1263 {
1264         int i;
1265
1266         for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
1267                 if (ppgtt->pml4.pdps[i] == ppgtt->base.scratch_pdp)
1268                         continue;
1269
1270                 gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]);
1271         }
1272
1273         cleanup_px(&ppgtt->base, &ppgtt->pml4);
1274 }
1275
1276 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
1277 {
1278         struct drm_i915_private *dev_priv = vm->i915;
1279         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1280
1281         if (intel_vgpu_active(dev_priv))
1282                 gen8_ppgtt_notify_vgt(ppgtt, false);
1283
1284         if (use_4lvl(vm))
1285                 gen8_ppgtt_cleanup_4lvl(ppgtt);
1286         else
1287                 gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp);
1288
1289         gen8_free_scratch(vm);
1290 }
1291
1292 static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
1293                                struct i915_page_directory *pd,
1294                                u64 start, u64 length)
1295 {
1296         struct i915_page_table *pt;
1297         u64 from = start;
1298         unsigned int pde;
1299
1300         gen8_for_each_pde(pt, pd, start, length, pde) {
1301                 int count = gen8_pte_count(start, length);
1302
1303                 if (pt == vm->scratch_pt) {
1304                         pt = alloc_pt(vm);
1305                         if (IS_ERR(pt))
1306                                 goto unwind;
1307
1308                         if (count < GEN8_PTES)
1309                                 gen8_initialize_pt(vm, pt);
1310
1311                         gen8_ppgtt_set_pde(vm, pd, pt, pde);
1312                         pd->used_pdes++;
1313                         GEM_BUG_ON(pd->used_pdes > I915_PDES);
1314                 }
1315
1316                 pt->used_ptes += count;
1317         }
1318         return 0;
1319
1320 unwind:
1321         gen8_ppgtt_clear_pd(vm, pd, from, start - from);
1322         return -ENOMEM;
1323 }
1324
1325 static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
1326                                 struct i915_page_directory_pointer *pdp,
1327                                 u64 start, u64 length)
1328 {
1329         struct i915_page_directory *pd;
1330         u64 from = start;
1331         unsigned int pdpe;
1332         int ret;
1333
1334         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1335                 if (pd == vm->scratch_pd) {
1336                         pd = alloc_pd(vm);
1337                         if (IS_ERR(pd))
1338                                 goto unwind;
1339
1340                         gen8_initialize_pd(vm, pd);
1341                         gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
1342                         pdp->used_pdpes++;
1343                         GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm));
1344
1345                         mark_tlbs_dirty(i915_vm_to_ppgtt(vm));
1346                 }
1347
1348                 ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
1349                 if (unlikely(ret))
1350                         goto unwind_pd;
1351         }
1352
1353         return 0;
1354
1355 unwind_pd:
1356         if (!pd->used_pdes) {
1357                 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
1358                 GEM_BUG_ON(!pdp->used_pdpes);
1359                 pdp->used_pdpes--;
1360                 free_pd(vm, pd);
1361         }
1362 unwind:
1363         gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
1364         return -ENOMEM;
1365 }
1366
1367 static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
1368                                  u64 start, u64 length)
1369 {
1370         return gen8_ppgtt_alloc_pdp(vm,
1371                                     &i915_vm_to_ppgtt(vm)->pdp, start, length);
1372 }
1373
1374 static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
1375                                  u64 start, u64 length)
1376 {
1377         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1378         struct i915_pml4 *pml4 = &ppgtt->pml4;
1379         struct i915_page_directory_pointer *pdp;
1380         u64 from = start;
1381         u32 pml4e;
1382         int ret;
1383
1384         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1385                 if (pml4->pdps[pml4e] == vm->scratch_pdp) {
1386                         pdp = alloc_pdp(vm);
1387                         if (IS_ERR(pdp))
1388                                 goto unwind;
1389
1390                         gen8_initialize_pdp(vm, pdp);
1391                         gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
1392                 }
1393
1394                 ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
1395                 if (unlikely(ret))
1396                         goto unwind_pdp;
1397         }
1398
1399         return 0;
1400
1401 unwind_pdp:
1402         if (!pdp->used_pdpes) {
1403                 gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
1404                 free_pdp(vm, pdp);
1405         }
1406 unwind:
1407         gen8_ppgtt_clear_4lvl(vm, from, start - from);
1408         return -ENOMEM;
1409 }
1410
1411 static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
1412                           struct i915_page_directory_pointer *pdp,
1413                           u64 start, u64 length,
1414                           gen8_pte_t scratch_pte,
1415                           struct seq_file *m)
1416 {
1417         struct i915_address_space *vm = &ppgtt->base;
1418         struct i915_page_directory *pd;
1419         u32 pdpe;
1420
1421         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1422                 struct i915_page_table *pt;
1423                 u64 pd_len = length;
1424                 u64 pd_start = start;
1425                 u32 pde;
1426
1427                 if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd)
1428                         continue;
1429
1430                 seq_printf(m, "\tPDPE #%d\n", pdpe);
1431                 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1432                         u32 pte;
1433                         gen8_pte_t *pt_vaddr;
1434
1435                         if (pd->page_table[pde] == ppgtt->base.scratch_pt)
1436                                 continue;
1437
1438                         pt_vaddr = kmap_atomic_px(pt);
1439                         for (pte = 0; pte < GEN8_PTES; pte += 4) {
1440                                 u64 va = (pdpe << GEN8_PDPE_SHIFT |
1441                                           pde << GEN8_PDE_SHIFT |
1442                                           pte << GEN8_PTE_SHIFT);
1443                                 int i;
1444                                 bool found = false;
1445
1446                                 for (i = 0; i < 4; i++)
1447                                         if (pt_vaddr[pte + i] != scratch_pte)
1448                                                 found = true;
1449                                 if (!found)
1450                                         continue;
1451
1452                                 seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1453                                 for (i = 0; i < 4; i++) {
1454                                         if (pt_vaddr[pte + i] != scratch_pte)
1455                                                 seq_printf(m, " %llx", pt_vaddr[pte + i]);
1456                                         else
1457                                                 seq_puts(m, "  SCRATCH ");
1458                                 }
1459                                 seq_puts(m, "\n");
1460                         }
1461                         kunmap_atomic(pt_vaddr);
1462                 }
1463         }
1464 }
1465
1466 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1467 {
1468         struct i915_address_space *vm = &ppgtt->base;
1469         const gen8_pte_t scratch_pte =
1470                 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
1471         u64 start = 0, length = ppgtt->base.total;
1472
1473         if (use_4lvl(vm)) {
1474                 u64 pml4e;
1475                 struct i915_pml4 *pml4 = &ppgtt->pml4;
1476                 struct i915_page_directory_pointer *pdp;
1477
1478                 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1479                         if (pml4->pdps[pml4e] == ppgtt->base.scratch_pdp)
1480                                 continue;
1481
1482                         seq_printf(m, "    PML4E #%llu\n", pml4e);
1483                         gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m);
1484                 }
1485         } else {
1486                 gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m);
1487         }
1488 }
1489
1490 static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
1491 {
1492         struct i915_address_space *vm = &ppgtt->base;
1493         struct i915_page_directory_pointer *pdp = &ppgtt->pdp;
1494         struct i915_page_directory *pd;
1495         u64 start = 0, length = ppgtt->base.total;
1496         u64 from = start;
1497         unsigned int pdpe;
1498
1499         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1500                 pd = alloc_pd(vm);
1501                 if (IS_ERR(pd))
1502                         goto unwind;
1503
1504                 gen8_initialize_pd(vm, pd);
1505                 gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
1506                 pdp->used_pdpes++;
1507         }
1508
1509         pdp->used_pdpes++; /* never remove */
1510         return 0;
1511
1512 unwind:
1513         start -= from;
1514         gen8_for_each_pdpe(pd, pdp, from, start, pdpe) {
1515                 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
1516                 free_pd(vm, pd);
1517         }
1518         pdp->used_pdpes = 0;
1519         return -ENOMEM;
1520 }
1521
1522 /*
1523  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1524  * with a net effect resembling a 2-level page table in normal x86 terms. Each
1525  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1526  * space.
1527  *
1528  */
1529 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1530 {
1531         struct i915_address_space *vm = &ppgtt->base;
1532         struct drm_i915_private *dev_priv = vm->i915;
1533         int ret;
1534
1535         ppgtt->base.total = USES_FULL_48BIT_PPGTT(dev_priv) ?
1536                 1ULL << 48 :
1537                 1ULL << 32;
1538
1539         /* There are only few exceptions for gen >=6. chv and bxt.
1540          * And we are not sure about the latter so play safe for now.
1541          */
1542         if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
1543                 ppgtt->base.pt_kmap_wc = true;
1544
1545         ret = gen8_init_scratch(&ppgtt->base);
1546         if (ret) {
1547                 ppgtt->base.total = 0;
1548                 return ret;
1549         }
1550
1551         if (use_4lvl(vm)) {
1552                 ret = setup_px(&ppgtt->base, &ppgtt->pml4);
1553                 if (ret)
1554                         goto free_scratch;
1555
1556                 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
1557
1558                 ppgtt->switch_mm = gen8_mm_switch_4lvl;
1559                 ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl;
1560                 ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
1561                 ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl;
1562         } else {
1563                 ret = __pdp_init(&ppgtt->base, &ppgtt->pdp);
1564                 if (ret)
1565                         goto free_scratch;
1566
1567                 if (intel_vgpu_active(dev_priv)) {
1568                         ret = gen8_preallocate_top_level_pdp(ppgtt);
1569                         if (ret) {
1570                                 __pdp_fini(&ppgtt->pdp);
1571                                 goto free_scratch;
1572                         }
1573                 }
1574
1575                 ppgtt->switch_mm = gen8_mm_switch_3lvl;
1576                 ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_3lvl;
1577                 ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl;
1578                 ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl;
1579         }
1580
1581         if (intel_vgpu_active(dev_priv))
1582                 gen8_ppgtt_notify_vgt(ppgtt, true);
1583
1584         ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1585         ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1586         ppgtt->base.bind_vma = ppgtt_bind_vma;
1587         ppgtt->base.set_pages = ppgtt_set_pages;
1588         ppgtt->base.clear_pages = clear_pages;
1589         ppgtt->debug_dump = gen8_dump_ppgtt;
1590
1591         return 0;
1592
1593 free_scratch:
1594         gen8_free_scratch(&ppgtt->base);
1595         return ret;
1596 }
1597
1598 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1599 {
1600         struct i915_address_space *vm = &ppgtt->base;
1601         struct i915_page_table *unused;
1602         gen6_pte_t scratch_pte;
1603         u32 pd_entry, pte, pde;
1604         u32 start = 0, length = ppgtt->base.total;
1605
1606         scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
1607                                      I915_CACHE_LLC, 0);
1608
1609         gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) {
1610                 u32 expected;
1611                 gen6_pte_t *pt_vaddr;
1612                 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1613                 pd_entry = readl(ppgtt->pd_addr + pde);
1614                 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1615
1616                 if (pd_entry != expected)
1617                         seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1618                                    pde,
1619                                    pd_entry,
1620                                    expected);
1621                 seq_printf(m, "\tPDE: %x\n", pd_entry);
1622
1623                 pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[pde]);
1624
1625                 for (pte = 0; pte < GEN6_PTES; pte+=4) {
1626                         unsigned long va =
1627                                 (pde * PAGE_SIZE * GEN6_PTES) +
1628                                 (pte * PAGE_SIZE);
1629                         int i;
1630                         bool found = false;
1631                         for (i = 0; i < 4; i++)
1632                                 if (pt_vaddr[pte + i] != scratch_pte)
1633                                         found = true;
1634                         if (!found)
1635                                 continue;
1636
1637                         seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1638                         for (i = 0; i < 4; i++) {
1639                                 if (pt_vaddr[pte + i] != scratch_pte)
1640                                         seq_printf(m, " %08x", pt_vaddr[pte + i]);
1641                                 else
1642                                         seq_puts(m, "  SCRATCH ");
1643                         }
1644                         seq_puts(m, "\n");
1645                 }
1646                 kunmap_atomic(pt_vaddr);
1647         }
1648 }
1649
1650 /* Write pde (index) from the page directory @pd to the page table @pt */
1651 static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt,
1652                                   const unsigned int pde,
1653                                   const struct i915_page_table *pt)
1654 {
1655         /* Caller needs to make sure the write completes if necessary */
1656         writel_relaxed(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
1657                        ppgtt->pd_addr + pde);
1658 }
1659
1660 /* Write all the page tables found in the ppgtt structure to incrementing page
1661  * directories. */
1662 static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt,
1663                                   u32 start, u32 length)
1664 {
1665         struct i915_page_table *pt;
1666         unsigned int pde;
1667
1668         gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde)
1669                 gen6_write_pde(ppgtt, pde, pt);
1670
1671         mark_tlbs_dirty(ppgtt);
1672         wmb();
1673 }
1674
1675 static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1676 {
1677         GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
1678         return ppgtt->pd.base.ggtt_offset << 10;
1679 }
1680
1681 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1682                          struct drm_i915_gem_request *req)
1683 {
1684         struct intel_engine_cs *engine = req->engine;
1685         u32 *cs;
1686
1687         /* NB: TLBs must be flushed and invalidated before a switch */
1688         cs = intel_ring_begin(req, 6);
1689         if (IS_ERR(cs))
1690                 return PTR_ERR(cs);
1691
1692         *cs++ = MI_LOAD_REGISTER_IMM(2);
1693         *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine));
1694         *cs++ = PP_DIR_DCLV_2G;
1695         *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
1696         *cs++ = get_pd_offset(ppgtt);
1697         *cs++ = MI_NOOP;
1698         intel_ring_advance(req, cs);
1699
1700         return 0;
1701 }
1702
1703 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1704                           struct drm_i915_gem_request *req)
1705 {
1706         struct intel_engine_cs *engine = req->engine;
1707         u32 *cs;
1708
1709         /* NB: TLBs must be flushed and invalidated before a switch */
1710         cs = intel_ring_begin(req, 6);
1711         if (IS_ERR(cs))
1712                 return PTR_ERR(cs);
1713
1714         *cs++ = MI_LOAD_REGISTER_IMM(2);
1715         *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine));
1716         *cs++ = PP_DIR_DCLV_2G;
1717         *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
1718         *cs++ = get_pd_offset(ppgtt);
1719         *cs++ = MI_NOOP;
1720         intel_ring_advance(req, cs);
1721
1722         return 0;
1723 }
1724
1725 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1726                           struct drm_i915_gem_request *req)
1727 {
1728         struct intel_engine_cs *engine = req->engine;
1729         struct drm_i915_private *dev_priv = req->i915;
1730
1731         I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
1732         I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
1733         return 0;
1734 }
1735
1736 static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv)
1737 {
1738         struct intel_engine_cs *engine;
1739         enum intel_engine_id id;
1740
1741         for_each_engine(engine, dev_priv, id) {
1742                 u32 four_level = USES_FULL_48BIT_PPGTT(dev_priv) ?
1743                                  GEN8_GFX_PPGTT_48B : 0;
1744                 I915_WRITE(RING_MODE_GEN7(engine),
1745                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
1746         }
1747 }
1748
1749 static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv)
1750 {
1751         struct intel_engine_cs *engine;
1752         u32 ecochk, ecobits;
1753         enum intel_engine_id id;
1754
1755         ecobits = I915_READ(GAC_ECO_BITS);
1756         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1757
1758         ecochk = I915_READ(GAM_ECOCHK);
1759         if (IS_HASWELL(dev_priv)) {
1760                 ecochk |= ECOCHK_PPGTT_WB_HSW;
1761         } else {
1762                 ecochk |= ECOCHK_PPGTT_LLC_IVB;
1763                 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1764         }
1765         I915_WRITE(GAM_ECOCHK, ecochk);
1766
1767         for_each_engine(engine, dev_priv, id) {
1768                 /* GFX_MODE is per-ring on gen7+ */
1769                 I915_WRITE(RING_MODE_GEN7(engine),
1770                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1771         }
1772 }
1773
1774 static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv)
1775 {
1776         u32 ecochk, gab_ctl, ecobits;
1777
1778         ecobits = I915_READ(GAC_ECO_BITS);
1779         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1780                    ECOBITS_PPGTT_CACHE64B);
1781
1782         gab_ctl = I915_READ(GAB_CTL);
1783         I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1784
1785         ecochk = I915_READ(GAM_ECOCHK);
1786         I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1787
1788         I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1789 }
1790
1791 /* PPGTT support for Sandybdrige/Gen6 and later */
1792 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1793                                    u64 start, u64 length)
1794 {
1795         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1796         unsigned int first_entry = start >> PAGE_SHIFT;
1797         unsigned int pde = first_entry / GEN6_PTES;
1798         unsigned int pte = first_entry % GEN6_PTES;
1799         unsigned int num_entries = length >> PAGE_SHIFT;
1800         gen6_pte_t scratch_pte =
1801                 vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
1802
1803         while (num_entries) {
1804                 struct i915_page_table *pt = ppgtt->pd.page_table[pde++];
1805                 unsigned int end = min(pte + num_entries, GEN6_PTES);
1806                 gen6_pte_t *vaddr;
1807
1808                 num_entries -= end - pte;
1809
1810                 /* Note that the hw doesn't support removing PDE on the fly
1811                  * (they are cached inside the context with no means to
1812                  * invalidate the cache), so we can only reset the PTE
1813                  * entries back to scratch.
1814                  */
1815
1816                 vaddr = kmap_atomic_px(pt);
1817                 do {
1818                         vaddr[pte++] = scratch_pte;
1819                 } while (pte < end);
1820                 kunmap_atomic(vaddr);
1821
1822                 pte = 0;
1823         }
1824 }
1825
1826 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1827                                       struct i915_vma *vma,
1828                                       enum i915_cache_level cache_level,
1829                                       u32 flags)
1830 {
1831         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1832         unsigned first_entry = vma->node.start >> PAGE_SHIFT;
1833         unsigned act_pt = first_entry / GEN6_PTES;
1834         unsigned act_pte = first_entry % GEN6_PTES;
1835         const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
1836         struct sgt_dma iter;
1837         gen6_pte_t *vaddr;
1838
1839         vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
1840         iter.sg = vma->pages->sgl;
1841         iter.dma = sg_dma_address(iter.sg);
1842         iter.max = iter.dma + iter.sg->length;
1843         do {
1844                 vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
1845
1846                 iter.dma += PAGE_SIZE;
1847                 if (iter.dma == iter.max) {
1848                         iter.sg = __sg_next(iter.sg);
1849                         if (!iter.sg)
1850                                 break;
1851
1852                         iter.dma = sg_dma_address(iter.sg);
1853                         iter.max = iter.dma + iter.sg->length;
1854                 }
1855
1856                 if (++act_pte == GEN6_PTES) {
1857                         kunmap_atomic(vaddr);
1858                         vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]);
1859                         act_pte = 0;
1860                 }
1861         } while (1);
1862         kunmap_atomic(vaddr);
1863 }
1864
1865 static int gen6_alloc_va_range(struct i915_address_space *vm,
1866                                u64 start, u64 length)
1867 {
1868         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1869         struct i915_page_table *pt;
1870         u64 from = start;
1871         unsigned int pde;
1872         bool flush = false;
1873
1874         gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
1875                 if (pt == vm->scratch_pt) {
1876                         pt = alloc_pt(vm);
1877                         if (IS_ERR(pt))
1878                                 goto unwind_out;
1879
1880                         gen6_initialize_pt(vm, pt);
1881                         ppgtt->pd.page_table[pde] = pt;
1882                         gen6_write_pde(ppgtt, pde, pt);
1883                         flush = true;
1884                 }
1885         }
1886
1887         if (flush) {
1888                 mark_tlbs_dirty(ppgtt);
1889                 wmb();
1890         }
1891
1892         return 0;
1893
1894 unwind_out:
1895         gen6_ppgtt_clear_range(vm, from, start);
1896         return -ENOMEM;
1897 }
1898
1899 static int gen6_init_scratch(struct i915_address_space *vm)
1900 {
1901         int ret;
1902
1903         ret = setup_scratch_page(vm, I915_GFP_DMA);
1904         if (ret)
1905                 return ret;
1906
1907         vm->scratch_pt = alloc_pt(vm);
1908         if (IS_ERR(vm->scratch_pt)) {
1909                 cleanup_scratch_page(vm);
1910                 return PTR_ERR(vm->scratch_pt);
1911         }
1912
1913         gen6_initialize_pt(vm, vm->scratch_pt);
1914
1915         return 0;
1916 }
1917
1918 static void gen6_free_scratch(struct i915_address_space *vm)
1919 {
1920         free_pt(vm, vm->scratch_pt);
1921         cleanup_scratch_page(vm);
1922 }
1923
1924 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1925 {
1926         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1927         struct i915_page_directory *pd = &ppgtt->pd;
1928         struct i915_page_table *pt;
1929         u32 pde;
1930
1931         drm_mm_remove_node(&ppgtt->node);
1932
1933         gen6_for_all_pdes(pt, pd, pde)
1934                 if (pt != vm->scratch_pt)
1935                         free_pt(vm, pt);
1936
1937         gen6_free_scratch(vm);
1938 }
1939
1940 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
1941 {
1942         struct i915_address_space *vm = &ppgtt->base;
1943         struct drm_i915_private *dev_priv = ppgtt->base.i915;
1944         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1945         int ret;
1946
1947         /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
1948          * allocator works in address space sizes, so it's multiplied by page
1949          * size. We allocate at the top of the GTT to avoid fragmentation.
1950          */
1951         BUG_ON(!drm_mm_initialized(&ggtt->base.mm));
1952
1953         ret = gen6_init_scratch(vm);
1954         if (ret)
1955                 return ret;
1956
1957         ret = i915_gem_gtt_insert(&ggtt->base, &ppgtt->node,
1958                                   GEN6_PD_SIZE, GEN6_PD_ALIGN,
1959                                   I915_COLOR_UNEVICTABLE,
1960                                   0, ggtt->base.total,
1961                                   PIN_HIGH);
1962         if (ret)
1963                 goto err_out;
1964
1965         if (ppgtt->node.start < ggtt->mappable_end)
1966                 DRM_DEBUG("Forced to use aperture for PDEs\n");
1967
1968         ppgtt->pd.base.ggtt_offset =
1969                 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
1970
1971         ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm +
1972                 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
1973
1974         return 0;
1975
1976 err_out:
1977         gen6_free_scratch(vm);
1978         return ret;
1979 }
1980
1981 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
1982 {
1983         return gen6_ppgtt_allocate_page_directories(ppgtt);
1984 }
1985
1986 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
1987                                   u64 start, u64 length)
1988 {
1989         struct i915_page_table *unused;
1990         u32 pde;
1991
1992         gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde)
1993                 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
1994 }
1995
1996 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1997 {
1998         struct drm_i915_private *dev_priv = ppgtt->base.i915;
1999         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2000         int ret;
2001
2002         ppgtt->base.pte_encode = ggtt->base.pte_encode;
2003         if (intel_vgpu_active(dev_priv) || IS_GEN6(dev_priv))
2004                 ppgtt->switch_mm = gen6_mm_switch;
2005         else if (IS_HASWELL(dev_priv))
2006                 ppgtt->switch_mm = hsw_mm_switch;
2007         else if (IS_GEN7(dev_priv))
2008                 ppgtt->switch_mm = gen7_mm_switch;
2009         else
2010                 BUG();
2011
2012         ret = gen6_ppgtt_alloc(ppgtt);
2013         if (ret)
2014                 return ret;
2015
2016         ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
2017
2018         gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
2019         gen6_write_page_range(ppgtt, 0, ppgtt->base.total);
2020
2021         ret = gen6_alloc_va_range(&ppgtt->base, 0, ppgtt->base.total);
2022         if (ret) {
2023                 gen6_ppgtt_cleanup(&ppgtt->base);
2024                 return ret;
2025         }
2026
2027         ppgtt->base.clear_range = gen6_ppgtt_clear_range;
2028         ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
2029         ppgtt->base.unbind_vma = ppgtt_unbind_vma;
2030         ppgtt->base.bind_vma = ppgtt_bind_vma;
2031         ppgtt->base.set_pages = ppgtt_set_pages;
2032         ppgtt->base.clear_pages = clear_pages;
2033         ppgtt->base.cleanup = gen6_ppgtt_cleanup;
2034         ppgtt->debug_dump = gen6_dump_ppgtt;
2035
2036         DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
2037                          ppgtt->node.size >> 20,
2038                          ppgtt->node.start / PAGE_SIZE);
2039
2040         DRM_DEBUG_DRIVER("Adding PPGTT at offset %x\n",
2041                          ppgtt->pd.base.ggtt_offset << 10);
2042
2043         return 0;
2044 }
2045
2046 static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
2047                            struct drm_i915_private *dev_priv)
2048 {
2049         ppgtt->base.i915 = dev_priv;
2050         ppgtt->base.dma = &dev_priv->drm.pdev->dev;
2051
2052         if (INTEL_INFO(dev_priv)->gen < 8)
2053                 return gen6_ppgtt_init(ppgtt);
2054         else
2055                 return gen8_ppgtt_init(ppgtt);
2056 }
2057
2058 static void i915_address_space_init(struct i915_address_space *vm,
2059                                     struct drm_i915_private *dev_priv,
2060                                     const char *name)
2061 {
2062         i915_gem_timeline_init(dev_priv, &vm->timeline, name);
2063
2064         drm_mm_init(&vm->mm, 0, vm->total);
2065         vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
2066
2067         INIT_LIST_HEAD(&vm->active_list);
2068         INIT_LIST_HEAD(&vm->inactive_list);
2069         INIT_LIST_HEAD(&vm->unbound_list);
2070
2071         list_add_tail(&vm->global_link, &dev_priv->vm_list);
2072         pagevec_init(&vm->free_pages, false);
2073 }
2074
2075 static void i915_address_space_fini(struct i915_address_space *vm)
2076 {
2077         if (pagevec_count(&vm->free_pages))
2078                 vm_free_pages_release(vm, true);
2079
2080         i915_gem_timeline_fini(&vm->timeline);
2081         drm_mm_takedown(&vm->mm);
2082         list_del(&vm->global_link);
2083 }
2084
2085 static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
2086 {
2087         /* This function is for gtt related workarounds. This function is
2088          * called on driver load and after a GPU reset, so you can place
2089          * workarounds here even if they get overwritten by GPU reset.
2090          */
2091         /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl */
2092         if (IS_BROADWELL(dev_priv))
2093                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
2094         else if (IS_CHERRYVIEW(dev_priv))
2095                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
2096         else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv))
2097                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
2098         else if (IS_GEN9_LP(dev_priv))
2099                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2100
2101         /*
2102          * To support 64K PTEs we need to first enable the use of the
2103          * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
2104          * mmio, otherwise the page-walker will simply ignore the IPS bit. This
2105          * shouldn't be needed after GEN10.
2106          *
2107          * 64K pages were first introduced from BDW+, although technically they
2108          * only *work* from gen9+. For pre-BDW we instead have the option for
2109          * 32K pages, but we don't currently have any support for it in our
2110          * driver.
2111          */
2112         if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) &&
2113             INTEL_GEN(dev_priv) <= 10)
2114                 I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA,
2115                            I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) |
2116                            GAMW_ECO_ENABLE_64K_IPS_FIELD);
2117 }
2118
2119 int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv)
2120 {
2121         gtt_write_workarounds(dev_priv);
2122
2123         /* In the case of execlists, PPGTT is enabled by the context descriptor
2124          * and the PDPs are contained within the context itself.  We don't
2125          * need to do anything here. */
2126         if (i915_modparams.enable_execlists)
2127                 return 0;
2128
2129         if (!USES_PPGTT(dev_priv))
2130                 return 0;
2131
2132         if (IS_GEN6(dev_priv))
2133                 gen6_ppgtt_enable(dev_priv);
2134         else if (IS_GEN7(dev_priv))
2135                 gen7_ppgtt_enable(dev_priv);
2136         else if (INTEL_GEN(dev_priv) >= 8)
2137                 gen8_ppgtt_enable(dev_priv);
2138         else
2139                 MISSING_CASE(INTEL_GEN(dev_priv));
2140
2141         return 0;
2142 }
2143
2144 struct i915_hw_ppgtt *
2145 i915_ppgtt_create(struct drm_i915_private *dev_priv,
2146                   struct drm_i915_file_private *fpriv,
2147                   const char *name)
2148 {
2149         struct i915_hw_ppgtt *ppgtt;
2150         int ret;
2151
2152         ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2153         if (!ppgtt)
2154                 return ERR_PTR(-ENOMEM);
2155
2156         ret = __hw_ppgtt_init(ppgtt, dev_priv);
2157         if (ret) {
2158                 kfree(ppgtt);
2159                 return ERR_PTR(ret);
2160         }
2161
2162         kref_init(&ppgtt->ref);
2163         i915_address_space_init(&ppgtt->base, dev_priv, name);
2164         ppgtt->base.file = fpriv;
2165
2166         trace_i915_ppgtt_create(&ppgtt->base);
2167
2168         return ppgtt;
2169 }
2170
2171 void i915_ppgtt_close(struct i915_address_space *vm)
2172 {
2173         struct list_head *phases[] = {
2174                 &vm->active_list,
2175                 &vm->inactive_list,
2176                 &vm->unbound_list,
2177                 NULL,
2178         }, **phase;
2179
2180         GEM_BUG_ON(vm->closed);
2181         vm->closed = true;
2182
2183         for (phase = phases; *phase; phase++) {
2184                 struct i915_vma *vma, *vn;
2185
2186                 list_for_each_entry_safe(vma, vn, *phase, vm_link)
2187                         if (!i915_vma_is_closed(vma))
2188                                 i915_vma_close(vma);
2189         }
2190 }
2191
2192 void i915_ppgtt_release(struct kref *kref)
2193 {
2194         struct i915_hw_ppgtt *ppgtt =
2195                 container_of(kref, struct i915_hw_ppgtt, ref);
2196
2197         trace_i915_ppgtt_release(&ppgtt->base);
2198
2199         /* vmas should already be unbound and destroyed */
2200         WARN_ON(!list_empty(&ppgtt->base.active_list));
2201         WARN_ON(!list_empty(&ppgtt->base.inactive_list));
2202         WARN_ON(!list_empty(&ppgtt->base.unbound_list));
2203
2204         ppgtt->base.cleanup(&ppgtt->base);
2205         i915_address_space_fini(&ppgtt->base);
2206         kfree(ppgtt);
2207 }
2208
2209 /* Certain Gen5 chipsets require require idling the GPU before
2210  * unmapping anything from the GTT when VT-d is enabled.
2211  */
2212 static bool needs_idle_maps(struct drm_i915_private *dev_priv)
2213 {
2214         /* Query intel_iommu to see if we need the workaround. Presumably that
2215          * was loaded first.
2216          */
2217         return IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_vtd_active();
2218 }
2219
2220 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
2221 {
2222         struct intel_engine_cs *engine;
2223         enum intel_engine_id id;
2224
2225         if (INTEL_INFO(dev_priv)->gen < 6)
2226                 return;
2227
2228         for_each_engine(engine, dev_priv, id) {
2229                 u32 fault_reg;
2230                 fault_reg = I915_READ(RING_FAULT_REG(engine));
2231                 if (fault_reg & RING_FAULT_VALID) {
2232                         DRM_DEBUG_DRIVER("Unexpected fault\n"
2233                                          "\tAddr: 0x%08lx\n"
2234                                          "\tAddress space: %s\n"
2235                                          "\tSource ID: %d\n"
2236                                          "\tType: %d\n",
2237                                          fault_reg & PAGE_MASK,
2238                                          fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2239                                          RING_FAULT_SRCID(fault_reg),
2240                                          RING_FAULT_FAULT_TYPE(fault_reg));
2241                         I915_WRITE(RING_FAULT_REG(engine),
2242                                    fault_reg & ~RING_FAULT_VALID);
2243                 }
2244         }
2245
2246         /* Engine specific init may not have been done till this point. */
2247         if (dev_priv->engine[RCS])
2248                 POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS]));
2249 }
2250
2251 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
2252 {
2253         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2254
2255         /* Don't bother messing with faults pre GEN6 as we have little
2256          * documentation supporting that it's a good idea.
2257          */
2258         if (INTEL_GEN(dev_priv) < 6)
2259                 return;
2260
2261         i915_check_and_clear_faults(dev_priv);
2262
2263         ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total);
2264
2265         i915_ggtt_invalidate(dev_priv);
2266 }
2267
2268 int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
2269                                struct sg_table *pages)
2270 {
2271         do {
2272                 if (dma_map_sg(&obj->base.dev->pdev->dev,
2273                                pages->sgl, pages->nents,
2274                                PCI_DMA_BIDIRECTIONAL))
2275                         return 0;
2276
2277                 /* If the DMA remap fails, one cause can be that we have
2278                  * too many objects pinned in a small remapping table,
2279                  * such as swiotlb. Incrementally purge all other objects and
2280                  * try again - if there are no more pages to remove from
2281                  * the DMA remapper, i915_gem_shrink will return 0.
2282                  */
2283                 GEM_BUG_ON(obj->mm.pages == pages);
2284         } while (i915_gem_shrink(to_i915(obj->base.dev),
2285                                  obj->base.size >> PAGE_SHIFT, NULL,
2286                                  I915_SHRINK_BOUND |
2287                                  I915_SHRINK_UNBOUND |
2288                                  I915_SHRINK_ACTIVE));
2289
2290         return -ENOSPC;
2291 }
2292
2293 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2294 {
2295         writeq(pte, addr);
2296 }
2297
2298 static void gen8_ggtt_insert_page(struct i915_address_space *vm,
2299                                   dma_addr_t addr,
2300                                   u64 offset,
2301                                   enum i915_cache_level level,
2302                                   u32 unused)
2303 {
2304         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2305         gen8_pte_t __iomem *pte =
2306                 (gen8_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
2307
2308         gen8_set_pte(pte, gen8_pte_encode(addr, level));
2309
2310         ggtt->invalidate(vm->i915);
2311 }
2312
2313 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2314                                      struct i915_vma *vma,
2315                                      enum i915_cache_level level,
2316                                      u32 unused)
2317 {
2318         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2319         struct sgt_iter sgt_iter;
2320         gen8_pte_t __iomem *gtt_entries;
2321         const gen8_pte_t pte_encode = gen8_pte_encode(0, level);
2322         dma_addr_t addr;
2323
2324         gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
2325         gtt_entries += vma->node.start >> PAGE_SHIFT;
2326         for_each_sgt_dma(addr, sgt_iter, vma->pages)
2327                 gen8_set_pte(gtt_entries++, pte_encode | addr);
2328
2329         wmb();
2330
2331         /* This next bit makes the above posting read even more important. We
2332          * want to flush the TLBs only after we're certain all the PTE updates
2333          * have finished.
2334          */
2335         ggtt->invalidate(vm->i915);
2336 }
2337
2338 static void gen6_ggtt_insert_page(struct i915_address_space *vm,
2339                                   dma_addr_t addr,
2340                                   u64 offset,
2341                                   enum i915_cache_level level,
2342                                   u32 flags)
2343 {
2344         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2345         gen6_pte_t __iomem *pte =
2346                 (gen6_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
2347
2348         iowrite32(vm->pte_encode(addr, level, flags), pte);
2349
2350         ggtt->invalidate(vm->i915);
2351 }
2352
2353 /*
2354  * Binds an object into the global gtt with the specified cache level. The object
2355  * will be accessible to the GPU via commands whose operands reference offsets
2356  * within the global GTT as well as accessible by the GPU through the GMADR
2357  * mapped BAR (dev_priv->mm.gtt->gtt).
2358  */
2359 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2360                                      struct i915_vma *vma,
2361                                      enum i915_cache_level level,
2362                                      u32 flags)
2363 {
2364         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2365         gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
2366         unsigned int i = vma->node.start >> PAGE_SHIFT;
2367         struct sgt_iter iter;
2368         dma_addr_t addr;
2369         for_each_sgt_dma(addr, iter, vma->pages)
2370                 iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
2371         wmb();
2372
2373         /* This next bit makes the above posting read even more important. We
2374          * want to flush the TLBs only after we're certain all the PTE updates
2375          * have finished.
2376          */
2377         ggtt->invalidate(vm->i915);
2378 }
2379
2380 static void nop_clear_range(struct i915_address_space *vm,
2381                             u64 start, u64 length)
2382 {
2383 }
2384
2385 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2386                                   u64 start, u64 length)
2387 {
2388         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2389         unsigned first_entry = start >> PAGE_SHIFT;
2390         unsigned num_entries = length >> PAGE_SHIFT;
2391         const gen8_pte_t scratch_pte =
2392                 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
2393         gen8_pte_t __iomem *gtt_base =
2394                 (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
2395         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2396         int i;
2397
2398         if (WARN(num_entries > max_entries,
2399                  "First entry = %d; Num entries = %d (max=%d)\n",
2400                  first_entry, num_entries, max_entries))
2401                 num_entries = max_entries;
2402
2403         for (i = 0; i < num_entries; i++)
2404                 gen8_set_pte(&gtt_base[i], scratch_pte);
2405 }
2406
2407 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
2408 {
2409         struct drm_i915_private *dev_priv = vm->i915;
2410
2411         /*
2412          * Make sure the internal GAM fifo has been cleared of all GTT
2413          * writes before exiting stop_machine(). This guarantees that
2414          * any aperture accesses waiting to start in another process
2415          * cannot back up behind the GTT writes causing a hang.
2416          * The register can be any arbitrary GAM register.
2417          */
2418         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2419 }
2420
2421 struct insert_page {
2422         struct i915_address_space *vm;
2423         dma_addr_t addr;
2424         u64 offset;
2425         enum i915_cache_level level;
2426 };
2427
2428 static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
2429 {
2430         struct insert_page *arg = _arg;
2431
2432         gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
2433         bxt_vtd_ggtt_wa(arg->vm);
2434
2435         return 0;
2436 }
2437
2438 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
2439                                           dma_addr_t addr,
2440                                           u64 offset,
2441                                           enum i915_cache_level level,
2442                                           u32 unused)
2443 {
2444         struct insert_page arg = { vm, addr, offset, level };
2445
2446         stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
2447 }
2448
2449 struct insert_entries {
2450         struct i915_address_space *vm;
2451         struct i915_vma *vma;
2452         enum i915_cache_level level;
2453 };
2454
2455 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
2456 {
2457         struct insert_entries *arg = _arg;
2458
2459         gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, 0);
2460         bxt_vtd_ggtt_wa(arg->vm);
2461
2462         return 0;
2463 }
2464
2465 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2466                                              struct i915_vma *vma,
2467                                              enum i915_cache_level level,
2468                                              u32 unused)
2469 {
2470         struct insert_entries arg = { vm, vma, level };
2471
2472         stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
2473 }
2474
2475 struct clear_range {
2476         struct i915_address_space *vm;
2477         u64 start;
2478         u64 length;
2479 };
2480
2481 static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
2482 {
2483         struct clear_range *arg = _arg;
2484
2485         gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
2486         bxt_vtd_ggtt_wa(arg->vm);
2487
2488         return 0;
2489 }
2490
2491 static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
2492                                           u64 start,
2493                                           u64 length)
2494 {
2495         struct clear_range arg = { vm, start, length };
2496
2497         stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
2498 }
2499
2500 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2501                                   u64 start, u64 length)
2502 {
2503         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2504         unsigned first_entry = start >> PAGE_SHIFT;
2505         unsigned num_entries = length >> PAGE_SHIFT;
2506         gen6_pte_t scratch_pte, __iomem *gtt_base =
2507                 (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
2508         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2509         int i;
2510
2511         if (WARN(num_entries > max_entries,
2512                  "First entry = %d; Num entries = %d (max=%d)\n",
2513                  first_entry, num_entries, max_entries))
2514                 num_entries = max_entries;
2515
2516         scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
2517                                      I915_CACHE_LLC, 0);
2518
2519         for (i = 0; i < num_entries; i++)
2520                 iowrite32(scratch_pte, &gtt_base[i]);
2521 }
2522
2523 static void i915_ggtt_insert_page(struct i915_address_space *vm,
2524                                   dma_addr_t addr,
2525                                   u64 offset,
2526                                   enum i915_cache_level cache_level,
2527                                   u32 unused)
2528 {
2529         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2530                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2531
2532         intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
2533 }
2534
2535 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2536                                      struct i915_vma *vma,
2537                                      enum i915_cache_level cache_level,
2538                                      u32 unused)
2539 {
2540         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2541                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2542
2543         intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
2544                                     flags);
2545 }
2546
2547 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2548                                   u64 start, u64 length)
2549 {
2550         intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
2551 }
2552
2553 static int ggtt_bind_vma(struct i915_vma *vma,
2554                          enum i915_cache_level cache_level,
2555                          u32 flags)
2556 {
2557         struct drm_i915_private *i915 = vma->vm->i915;
2558         struct drm_i915_gem_object *obj = vma->obj;
2559         u32 pte_flags;
2560
2561         /* Currently applicable only to VLV */
2562         pte_flags = 0;
2563         if (obj->gt_ro)
2564                 pte_flags |= PTE_READ_ONLY;
2565
2566         intel_runtime_pm_get(i915);
2567         vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
2568         intel_runtime_pm_put(i915);
2569
2570         /*
2571          * Without aliasing PPGTT there's no difference between
2572          * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2573          * upgrade to both bound if we bind either to avoid double-binding.
2574          */
2575         vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
2576
2577         return 0;
2578 }
2579
2580 static void ggtt_unbind_vma(struct i915_vma *vma)
2581 {
2582         struct drm_i915_private *i915 = vma->vm->i915;
2583
2584         intel_runtime_pm_get(i915);
2585         vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
2586         intel_runtime_pm_put(i915);
2587 }
2588
2589 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2590                                  enum i915_cache_level cache_level,
2591                                  u32 flags)
2592 {
2593         struct drm_i915_private *i915 = vma->vm->i915;
2594         u32 pte_flags;
2595         int ret;
2596
2597         /* Currently applicable only to VLV */
2598         pte_flags = 0;
2599         if (vma->obj->gt_ro)
2600                 pte_flags |= PTE_READ_ONLY;
2601
2602         if (flags & I915_VMA_LOCAL_BIND) {
2603                 struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
2604
2605                 if (!(vma->flags & I915_VMA_LOCAL_BIND) &&
2606                     appgtt->base.allocate_va_range) {
2607                         ret = appgtt->base.allocate_va_range(&appgtt->base,
2608                                                              vma->node.start,
2609                                                              vma->size);
2610                         if (ret)
2611                                 return ret;
2612                 }
2613
2614                 appgtt->base.insert_entries(&appgtt->base, vma, cache_level,
2615                                             pte_flags);
2616         }
2617
2618         if (flags & I915_VMA_GLOBAL_BIND) {
2619                 intel_runtime_pm_get(i915);
2620                 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
2621                 intel_runtime_pm_put(i915);
2622         }
2623
2624         return 0;
2625 }
2626
2627 static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
2628 {
2629         struct drm_i915_private *i915 = vma->vm->i915;
2630
2631         if (vma->flags & I915_VMA_GLOBAL_BIND) {
2632                 intel_runtime_pm_get(i915);
2633                 vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
2634                 intel_runtime_pm_put(i915);
2635         }
2636
2637         if (vma->flags & I915_VMA_LOCAL_BIND) {
2638                 struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->base;
2639
2640                 vm->clear_range(vm, vma->node.start, vma->size);
2641         }
2642 }
2643
2644 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
2645                                struct sg_table *pages)
2646 {
2647         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2648         struct device *kdev = &dev_priv->drm.pdev->dev;
2649         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2650
2651         if (unlikely(ggtt->do_idle_maps)) {
2652                 if (i915_gem_wait_for_idle(dev_priv, 0)) {
2653                         DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
2654                         /* Wait a bit, in hopes it avoids the hang */
2655                         udelay(10);
2656                 }
2657         }
2658
2659         dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL);
2660 }
2661
2662 static int ggtt_set_pages(struct i915_vma *vma)
2663 {
2664         int ret;
2665
2666         GEM_BUG_ON(vma->pages);
2667
2668         ret = i915_get_ggtt_vma_pages(vma);
2669         if (ret)
2670                 return ret;
2671
2672         vma->page_sizes = vma->obj->mm.page_sizes;
2673
2674         return 0;
2675 }
2676
2677 static void i915_gtt_color_adjust(const struct drm_mm_node *node,
2678                                   unsigned long color,
2679                                   u64 *start,
2680                                   u64 *end)
2681 {
2682         if (node->allocated && node->color != color)
2683                 *start += I915_GTT_PAGE_SIZE;
2684
2685         /* Also leave a space between the unallocated reserved node after the
2686          * GTT and any objects within the GTT, i.e. we use the color adjustment
2687          * to insert a guard page to prevent prefetches crossing over the
2688          * GTT boundary.
2689          */
2690         node = list_next_entry(node, node_list);
2691         if (node->color != color)
2692                 *end -= I915_GTT_PAGE_SIZE;
2693 }
2694
2695 int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
2696 {
2697         struct i915_ggtt *ggtt = &i915->ggtt;
2698         struct i915_hw_ppgtt *ppgtt;
2699         int err;
2700
2701         ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM), "[alias]");
2702         if (IS_ERR(ppgtt))
2703                 return PTR_ERR(ppgtt);
2704
2705         if (WARN_ON(ppgtt->base.total < ggtt->base.total)) {
2706                 err = -ENODEV;
2707                 goto err_ppgtt;
2708         }
2709
2710         if (ppgtt->base.allocate_va_range) {
2711                 /* Note we only pre-allocate as far as the end of the global
2712                  * GTT. On 48b / 4-level page-tables, the difference is very,
2713                  * very significant! We have to preallocate as GVT/vgpu does
2714                  * not like the page directory disappearing.
2715                  */
2716                 err = ppgtt->base.allocate_va_range(&ppgtt->base,
2717                                                     0, ggtt->base.total);
2718                 if (err)
2719                         goto err_ppgtt;
2720         }
2721
2722         i915->mm.aliasing_ppgtt = ppgtt;
2723
2724         WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
2725         ggtt->base.bind_vma = aliasing_gtt_bind_vma;
2726
2727         WARN_ON(ggtt->base.unbind_vma != ggtt_unbind_vma);
2728         ggtt->base.unbind_vma = aliasing_gtt_unbind_vma;
2729
2730         return 0;
2731
2732 err_ppgtt:
2733         i915_ppgtt_put(ppgtt);
2734         return err;
2735 }
2736
2737 void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915)
2738 {
2739         struct i915_ggtt *ggtt = &i915->ggtt;
2740         struct i915_hw_ppgtt *ppgtt;
2741
2742         ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt);
2743         if (!ppgtt)
2744                 return;
2745
2746         i915_ppgtt_put(ppgtt);
2747
2748         ggtt->base.bind_vma = ggtt_bind_vma;
2749         ggtt->base.unbind_vma = ggtt_unbind_vma;
2750 }
2751
2752 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
2753 {
2754         /* Let GEM Manage all of the aperture.
2755          *
2756          * However, leave one page at the end still bound to the scratch page.
2757          * There are a number of places where the hardware apparently prefetches
2758          * past the end of the object, and we've seen multiple hangs with the
2759          * GPU head pointer stuck in a batchbuffer bound at the last page of the
2760          * aperture.  One page should be enough to keep any prefetching inside
2761          * of the aperture.
2762          */
2763         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2764         unsigned long hole_start, hole_end;
2765         struct drm_mm_node *entry;
2766         int ret;
2767
2768         ret = intel_vgt_balloon(dev_priv);
2769         if (ret)
2770                 return ret;
2771
2772         /* Reserve a mappable slot for our lockless error capture */
2773         ret = drm_mm_insert_node_in_range(&ggtt->base.mm, &ggtt->error_capture,
2774                                           PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
2775                                           0, ggtt->mappable_end,
2776                                           DRM_MM_INSERT_LOW);
2777         if (ret)
2778                 return ret;
2779
2780         /* Clear any non-preallocated blocks */
2781         drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
2782                 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2783                               hole_start, hole_end);
2784                 ggtt->base.clear_range(&ggtt->base, hole_start,
2785                                        hole_end - hole_start);
2786         }
2787
2788         /* And finally clear the reserved guard page */
2789         ggtt->base.clear_range(&ggtt->base,
2790                                ggtt->base.total - PAGE_SIZE, PAGE_SIZE);
2791
2792         if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
2793                 ret = i915_gem_init_aliasing_ppgtt(dev_priv);
2794                 if (ret)
2795                         goto err;
2796         }
2797
2798         return 0;
2799
2800 err:
2801         drm_mm_remove_node(&ggtt->error_capture);
2802         return ret;
2803 }
2804
2805 /**
2806  * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
2807  * @dev_priv: i915 device
2808  */
2809 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
2810 {
2811         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2812         struct i915_vma *vma, *vn;
2813         struct pagevec *pvec;
2814
2815         ggtt->base.closed = true;
2816
2817         mutex_lock(&dev_priv->drm.struct_mutex);
2818         WARN_ON(!list_empty(&ggtt->base.active_list));
2819         list_for_each_entry_safe(vma, vn, &ggtt->base.inactive_list, vm_link)
2820                 WARN_ON(i915_vma_unbind(vma));
2821         mutex_unlock(&dev_priv->drm.struct_mutex);
2822
2823         i915_gem_cleanup_stolen(&dev_priv->drm);
2824
2825         mutex_lock(&dev_priv->drm.struct_mutex);
2826         i915_gem_fini_aliasing_ppgtt(dev_priv);
2827
2828         if (drm_mm_node_allocated(&ggtt->error_capture))
2829                 drm_mm_remove_node(&ggtt->error_capture);
2830
2831         if (drm_mm_initialized(&ggtt->base.mm)) {
2832                 intel_vgt_deballoon(dev_priv);
2833                 i915_address_space_fini(&ggtt->base);
2834         }
2835
2836         ggtt->base.cleanup(&ggtt->base);
2837
2838         pvec = &dev_priv->mm.wc_stash;
2839         if (pvec->nr) {
2840                 set_pages_array_wb(pvec->pages, pvec->nr);
2841                 __pagevec_release(pvec);
2842         }
2843
2844         mutex_unlock(&dev_priv->drm.struct_mutex);
2845
2846         arch_phys_wc_del(ggtt->mtrr);
2847         io_mapping_fini(&ggtt->mappable);
2848 }
2849
2850 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2851 {
2852         snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2853         snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2854         return snb_gmch_ctl << 20;
2855 }
2856
2857 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2858 {
2859         bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2860         bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2861         if (bdw_gmch_ctl)
2862                 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2863
2864 #ifdef CONFIG_X86_32
2865         /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
2866         if (bdw_gmch_ctl > 4)
2867                 bdw_gmch_ctl = 4;
2868 #endif
2869
2870         return bdw_gmch_ctl << 20;
2871 }
2872
2873 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2874 {
2875         gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2876         gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2877
2878         if (gmch_ctrl)
2879                 return 1 << (20 + gmch_ctrl);
2880
2881         return 0;
2882 }
2883
2884 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
2885 {
2886         snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2887         snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
2888         return (size_t)snb_gmch_ctl << 25; /* 32 MB units */
2889 }
2890
2891 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
2892 {
2893         bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2894         bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
2895         return (size_t)bdw_gmch_ctl << 25; /* 32 MB units */
2896 }
2897
2898 static size_t chv_get_stolen_size(u16 gmch_ctrl)
2899 {
2900         gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
2901         gmch_ctrl &= SNB_GMCH_GMS_MASK;
2902
2903         /*
2904          * 0x0  to 0x10: 32MB increments starting at 0MB
2905          * 0x11 to 0x16: 4MB increments starting at 8MB
2906          * 0x17 to 0x1d: 4MB increments start at 36MB
2907          */
2908         if (gmch_ctrl < 0x11)
2909                 return (size_t)gmch_ctrl << 25;
2910         else if (gmch_ctrl < 0x17)
2911                 return (size_t)(gmch_ctrl - 0x11 + 2) << 22;
2912         else
2913                 return (size_t)(gmch_ctrl - 0x17 + 9) << 22;
2914 }
2915
2916 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
2917 {
2918         gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2919         gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
2920
2921         if (gen9_gmch_ctl < 0xf0)
2922                 return (size_t)gen9_gmch_ctl << 25; /* 32 MB units */
2923         else
2924                 /* 4MB increments starting at 0xf0 for 4MB */
2925                 return (size_t)(gen9_gmch_ctl - 0xf0 + 1) << 22;
2926 }
2927
2928 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
2929 {
2930         struct drm_i915_private *dev_priv = ggtt->base.i915;
2931         struct pci_dev *pdev = dev_priv->drm.pdev;
2932         phys_addr_t phys_addr;
2933         int ret;
2934
2935         /* For Modern GENs the PTEs and register space are split in the BAR */
2936         phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
2937
2938         /*
2939          * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
2940          * will be dropped. For WC mappings in general we have 64 byte burst
2941          * writes when the WC buffer is flushed, so we can't use it, but have to
2942          * resort to an uncached mapping. The WC issue is easily caught by the
2943          * readback check when writing GTT PTE entries.
2944          */
2945         if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10)
2946                 ggtt->gsm = ioremap_nocache(phys_addr, size);
2947         else
2948                 ggtt->gsm = ioremap_wc(phys_addr, size);
2949         if (!ggtt->gsm) {
2950                 DRM_ERROR("Failed to map the ggtt page table\n");
2951                 return -ENOMEM;
2952         }
2953
2954         ret = setup_scratch_page(&ggtt->base, GFP_DMA32);
2955         if (ret) {
2956                 DRM_ERROR("Scratch setup failed\n");
2957                 /* iounmap will also get called at remove, but meh */
2958                 iounmap(ggtt->gsm);
2959                 return ret;
2960         }
2961
2962         return 0;
2963 }
2964
2965 static struct intel_ppat_entry *
2966 __alloc_ppat_entry(struct intel_ppat *ppat, unsigned int index, u8 value)
2967 {
2968         struct intel_ppat_entry *entry = &ppat->entries[index];
2969
2970         GEM_BUG_ON(index >= ppat->max_entries);
2971         GEM_BUG_ON(test_bit(index, ppat->used));
2972
2973         entry->ppat = ppat;
2974         entry->value = value;
2975         kref_init(&entry->ref);
2976         set_bit(index, ppat->used);
2977         set_bit(index, ppat->dirty);
2978
2979         return entry;
2980 }
2981
2982 static void __free_ppat_entry(struct intel_ppat_entry *entry)
2983 {
2984         struct intel_ppat *ppat = entry->ppat;
2985         unsigned int index = entry - ppat->entries;
2986
2987         GEM_BUG_ON(index >= ppat->max_entries);
2988         GEM_BUG_ON(!test_bit(index, ppat->used));
2989
2990         entry->value = ppat->clear_value;
2991         clear_bit(index, ppat->used);
2992         set_bit(index, ppat->dirty);
2993 }
2994
2995 /**
2996  * intel_ppat_get - get a usable PPAT entry
2997  * @i915: i915 device instance
2998  * @value: the PPAT value required by the caller
2999  *
3000  * The function tries to search if there is an existing PPAT entry which
3001  * matches with the required value. If perfectly matched, the existing PPAT
3002  * entry will be used. If only partially matched, it will try to check if
3003  * there is any available PPAT index. If yes, it will allocate a new PPAT
3004  * index for the required entry and update the HW. If not, the partially
3005  * matched entry will be used.
3006  */
3007 const struct intel_ppat_entry *
3008 intel_ppat_get(struct drm_i915_private *i915, u8 value)
3009 {
3010         struct intel_ppat *ppat = &i915->ppat;
3011         struct intel_ppat_entry *entry;
3012         unsigned int scanned, best_score;
3013         int i;
3014
3015         GEM_BUG_ON(!ppat->max_entries);
3016
3017         scanned = best_score = 0;
3018         for_each_set_bit(i, ppat->used, ppat->max_entries) {
3019                 unsigned int score;
3020
3021                 score = ppat->match(ppat->entries[i].value, value);
3022                 if (score > best_score) {
3023                         entry = &ppat->entries[i];
3024                         if (score == INTEL_PPAT_PERFECT_MATCH) {
3025                                 kref_get(&entry->ref);
3026                                 return entry;
3027                         }
3028                         best_score = score;
3029                 }
3030                 scanned++;
3031         }
3032
3033         if (scanned == ppat->max_entries) {
3034                 if (!best_score)
3035                         return ERR_PTR(-ENOSPC);
3036
3037                 kref_get(&entry->ref);
3038                 return entry;
3039         }
3040
3041         i = find_first_zero_bit(ppat->used, ppat->max_entries);
3042         entry = __alloc_ppat_entry(ppat, i, value);
3043         ppat->update_hw(i915);
3044         return entry;
3045 }
3046
3047 static void release_ppat(struct kref *kref)
3048 {
3049         struct intel_ppat_entry *entry =
3050                 container_of(kref, struct intel_ppat_entry, ref);
3051         struct drm_i915_private *i915 = entry->ppat->i915;
3052
3053         __free_ppat_entry(entry);
3054         entry->ppat->update_hw(i915);
3055 }
3056
3057 /**
3058  * intel_ppat_put - put back the PPAT entry got from intel_ppat_get()
3059  * @entry: an intel PPAT entry
3060  *
3061  * Put back the PPAT entry got from intel_ppat_get(). If the PPAT index of the
3062  * entry is dynamically allocated, its reference count will be decreased. Once
3063  * the reference count becomes into zero, the PPAT index becomes free again.
3064  */
3065 void intel_ppat_put(const struct intel_ppat_entry *entry)
3066 {
3067         struct intel_ppat *ppat = entry->ppat;
3068         unsigned int index = entry - ppat->entries;
3069
3070         GEM_BUG_ON(!ppat->max_entries);
3071
3072         kref_put(&ppat->entries[index].ref, release_ppat);
3073 }
3074
3075 static void cnl_private_pat_update_hw(struct drm_i915_private *dev_priv)
3076 {
3077         struct intel_ppat *ppat = &dev_priv->ppat;
3078         int i;
3079
3080         for_each_set_bit(i, ppat->dirty, ppat->max_entries) {
3081                 I915_WRITE(GEN10_PAT_INDEX(i), ppat->entries[i].value);
3082                 clear_bit(i, ppat->dirty);
3083         }
3084 }
3085
3086 static void bdw_private_pat_update_hw(struct drm_i915_private *dev_priv)
3087 {
3088         struct intel_ppat *ppat = &dev_priv->ppat;
3089         u64 pat = 0;
3090         int i;
3091
3092         for (i = 0; i < ppat->max_entries; i++)
3093                 pat |= GEN8_PPAT(i, ppat->entries[i].value);
3094
3095         bitmap_clear(ppat->dirty, 0, ppat->max_entries);
3096
3097         I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
3098         I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
3099 }
3100
3101 static unsigned int bdw_private_pat_match(u8 src, u8 dst)
3102 {
3103         unsigned int score = 0;
3104         enum {
3105                 AGE_MATCH = BIT(0),
3106                 TC_MATCH = BIT(1),
3107                 CA_MATCH = BIT(2),
3108         };
3109
3110         /* Cache attribute has to be matched. */
3111         if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))
3112                 return 0;
3113
3114         score |= CA_MATCH;
3115
3116         if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst))
3117                 score |= TC_MATCH;
3118
3119         if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst))
3120                 score |= AGE_MATCH;
3121
3122         if (score == (AGE_MATCH | TC_MATCH | CA_MATCH))
3123                 return INTEL_PPAT_PERFECT_MATCH;
3124
3125         return score;
3126 }
3127
3128 static unsigned int chv_private_pat_match(u8 src, u8 dst)
3129 {
3130         return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ?
3131                 INTEL_PPAT_PERFECT_MATCH : 0;
3132 }
3133
3134 static void cnl_setup_private_ppat(struct intel_ppat *ppat)
3135 {
3136         ppat->max_entries = 8;
3137         ppat->update_hw = cnl_private_pat_update_hw;
3138         ppat->match = bdw_private_pat_match;
3139         ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3140
3141         /* XXX: spec is unclear if this is still needed for CNL+ */
3142         if (!USES_PPGTT(ppat->i915)) {
3143                 __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
3144                 return;
3145         }
3146
3147         __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);
3148         __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
3149         __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
3150         __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);
3151         __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3152         __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3153         __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3154         __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3155 }
3156
3157 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
3158  * bits. When using advanced contexts each context stores its own PAT, but
3159  * writing this data shouldn't be harmful even in those cases. */
3160 static void bdw_setup_private_ppat(struct intel_ppat *ppat)
3161 {
3162         ppat->max_entries = 8;
3163         ppat->update_hw = bdw_private_pat_update_hw;
3164         ppat->match = bdw_private_pat_match;
3165         ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3166
3167         if (!USES_PPGTT(ppat->i915)) {
3168                 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
3169                  * so RTL will always use the value corresponding to
3170                  * pat_sel = 000".
3171                  * So let's disable cache for GGTT to avoid screen corruptions.
3172                  * MOCS still can be used though.
3173                  * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
3174                  * before this patch, i.e. the same uncached + snooping access
3175                  * like on gen6/7 seems to be in effect.
3176                  * - So this just fixes blitter/render access. Again it looks
3177                  * like it's not just uncached access, but uncached + snooping.
3178                  * So we can still hold onto all our assumptions wrt cpu
3179                  * clflushing on LLC machines.
3180                  */
3181                 __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
3182                 return;
3183         }
3184
3185         __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);      /* for normal objects, no eLLC */
3186         __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);  /* for something pointing to ptes? */
3187         __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);  /* for scanout with eLLC */
3188         __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);                      /* Uncached objects, mostly for scanout */
3189         __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3190         __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3191         __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3192         __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3193 }
3194
3195 static void chv_setup_private_ppat(struct intel_ppat *ppat)
3196 {
3197         ppat->max_entries = 8;
3198         ppat->update_hw = bdw_private_pat_update_hw;
3199         ppat->match = chv_private_pat_match;
3200         ppat->clear_value = CHV_PPAT_SNOOP;
3201
3202         /*
3203          * Map WB on BDW to snooped on CHV.
3204          *
3205          * Only the snoop bit has meaning for CHV, the rest is
3206          * ignored.
3207          *
3208          * The hardware will never snoop for certain types of accesses:
3209          * - CPU GTT (GMADR->GGTT->no snoop->memory)
3210          * - PPGTT page tables
3211          * - some other special cycles
3212          *
3213          * As with BDW, we also need to consider the following for GT accesses:
3214          * "For GGTT, there is NO pat_sel[2:0] from the entry,
3215          * so RTL will always use the value corresponding to
3216          * pat_sel = 000".
3217          * Which means we must set the snoop bit in PAT entry 0
3218          * in order to keep the global status page working.
3219          */
3220
3221         __alloc_ppat_entry(ppat, 0, CHV_PPAT_SNOOP);
3222         __alloc_ppat_entry(ppat, 1, 0);
3223         __alloc_ppat_entry(ppat, 2, 0);
3224         __alloc_ppat_entry(ppat, 3, 0);
3225         __alloc_ppat_entry(ppat, 4, CHV_PPAT_SNOOP);
3226         __alloc_ppat_entry(ppat, 5, CHV_PPAT_SNOOP);
3227         __alloc_ppat_entry(ppat, 6, CHV_PPAT_SNOOP);
3228         __alloc_ppat_entry(ppat, 7, CHV_PPAT_SNOOP);
3229 }
3230
3231 static void gen6_gmch_remove(struct i915_address_space *vm)
3232 {
3233         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
3234
3235         iounmap(ggtt->gsm);
3236         cleanup_scratch_page(vm);
3237 }
3238
3239 static void setup_private_pat(struct drm_i915_private *dev_priv)
3240 {
3241         struct intel_ppat *ppat = &dev_priv->ppat;
3242         int i;
3243
3244         ppat->i915 = dev_priv;
3245
3246         if (INTEL_GEN(dev_priv) >= 10)
3247                 cnl_setup_private_ppat(ppat);
3248         else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
3249                 chv_setup_private_ppat(ppat);
3250         else
3251                 bdw_setup_private_ppat(ppat);
3252
3253         GEM_BUG_ON(ppat->max_entries > INTEL_MAX_PPAT_ENTRIES);
3254
3255         for_each_clear_bit(i, ppat->used, ppat->max_entries) {
3256                 ppat->entries[i].value = ppat->clear_value;
3257                 ppat->entries[i].ppat = ppat;
3258                 set_bit(i, ppat->dirty);
3259         }
3260
3261         ppat->update_hw(dev_priv);
3262 }
3263
3264 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
3265 {
3266         struct drm_i915_private *dev_priv = ggtt->base.i915;
3267         struct pci_dev *pdev = dev_priv->drm.pdev;
3268         unsigned int size;
3269         u16 snb_gmch_ctl;
3270         int err;
3271
3272         /* TODO: We're not aware of mappable constraints on gen8 yet */
3273         ggtt->mappable_base = pci_resource_start(pdev, 2);
3274         ggtt->mappable_end = pci_resource_len(pdev, 2);
3275
3276         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
3277         if (!err)
3278                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
3279         if (err)
3280                 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
3281
3282         pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3283
3284         if (INTEL_GEN(dev_priv) >= 9) {
3285                 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl);
3286                 size = gen8_get_total_gtt_size(snb_gmch_ctl);
3287         } else if (IS_CHERRYVIEW(dev_priv)) {
3288                 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl);
3289                 size = chv_get_total_gtt_size(snb_gmch_ctl);
3290         } else {
3291                 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl);
3292                 size = gen8_get_total_gtt_size(snb_gmch_ctl);
3293         }
3294
3295         ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
3296         ggtt->base.cleanup = gen6_gmch_remove;
3297         ggtt->base.bind_vma = ggtt_bind_vma;
3298         ggtt->base.unbind_vma = ggtt_unbind_vma;
3299         ggtt->base.set_pages = ggtt_set_pages;
3300         ggtt->base.clear_pages = clear_pages;
3301         ggtt->base.insert_page = gen8_ggtt_insert_page;
3302         ggtt->base.clear_range = nop_clear_range;
3303         if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
3304                 ggtt->base.clear_range = gen8_ggtt_clear_range;
3305
3306         ggtt->base.insert_entries = gen8_ggtt_insert_entries;
3307
3308         /* Serialize GTT updates with aperture access on BXT if VT-d is on. */
3309         if (intel_ggtt_update_needs_vtd_wa(dev_priv)) {
3310                 ggtt->base.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
3311                 ggtt->base.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
3312                 if (ggtt->base.clear_range != nop_clear_range)
3313                         ggtt->base.clear_range = bxt_vtd_ggtt_clear_range__BKL;
3314         }
3315
3316         ggtt->invalidate = gen6_ggtt_invalidate;
3317
3318         setup_private_pat(dev_priv);
3319
3320         return ggtt_probe_common(ggtt, size);
3321 }
3322
3323 static int gen6_gmch_probe(struct i915_ggtt *ggtt)
3324 {
3325         struct drm_i915_private *dev_priv = ggtt->base.i915;
3326         struct pci_dev *pdev = dev_priv->drm.pdev;
3327         unsigned int size;
3328         u16 snb_gmch_ctl;
3329         int err;
3330
3331         ggtt->mappable_base = pci_resource_start(pdev, 2);
3332         ggtt->mappable_end = pci_resource_len(pdev, 2);
3333
3334         /* 64/512MB is the current min/max we actually know of, but this is just
3335          * a coarse sanity check.
3336          */
3337         if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
3338                 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end);
3339                 return -ENXIO;
3340         }
3341
3342         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
3343         if (!err)
3344                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
3345         if (err)
3346                 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
3347         pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3348
3349         ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
3350
3351         size = gen6_get_total_gtt_size(snb_gmch_ctl);
3352         ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3353
3354         ggtt->base.clear_range = gen6_ggtt_clear_range;
3355         ggtt->base.insert_page = gen6_ggtt_insert_page;
3356         ggtt->base.insert_entries = gen6_ggtt_insert_entries;
3357         ggtt->base.bind_vma = ggtt_bind_vma;
3358         ggtt->base.unbind_vma = ggtt_unbind_vma;
3359         ggtt->base.set_pages = ggtt_set_pages;
3360         ggtt->base.clear_pages = clear_pages;
3361         ggtt->base.cleanup = gen6_gmch_remove;
3362
3363         ggtt->invalidate = gen6_ggtt_invalidate;
3364
3365         if (HAS_EDRAM(dev_priv))
3366                 ggtt->base.pte_encode = iris_pte_encode;
3367         else if (IS_HASWELL(dev_priv))
3368                 ggtt->base.pte_encode = hsw_pte_encode;
3369         else if (IS_VALLEYVIEW(dev_priv))
3370                 ggtt->base.pte_encode = byt_pte_encode;
3371         else if (INTEL_GEN(dev_priv) >= 7)
3372                 ggtt->base.pte_encode = ivb_pte_encode;
3373         else
3374                 ggtt->base.pte_encode = snb_pte_encode;
3375
3376         return ggtt_probe_common(ggtt, size);
3377 }
3378
3379 static void i915_gmch_remove(struct i915_address_space *vm)
3380 {
3381         intel_gmch_remove();
3382 }
3383
3384 static int i915_gmch_probe(struct i915_ggtt *ggtt)
3385 {
3386         struct drm_i915_private *dev_priv = ggtt->base.i915;
3387         int ret;
3388
3389         ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
3390         if (!ret) {
3391                 DRM_ERROR("failed to set up gmch\n");
3392                 return -EIO;
3393         }
3394
3395         intel_gtt_get(&ggtt->base.total,
3396                       &ggtt->stolen_size,
3397                       &ggtt->mappable_base,
3398                       &ggtt->mappable_end);
3399
3400         ggtt->do_idle_maps = needs_idle_maps(dev_priv);
3401         ggtt->base.insert_page = i915_ggtt_insert_page;
3402         ggtt->base.insert_entries = i915_ggtt_insert_entries;
3403         ggtt->base.clear_range = i915_ggtt_clear_range;
3404         ggtt->base.bind_vma = ggtt_bind_vma;
3405         ggtt->base.unbind_vma = ggtt_unbind_vma;
3406         ggtt->base.set_pages = ggtt_set_pages;
3407         ggtt->base.clear_pages = clear_pages;
3408         ggtt->base.cleanup = i915_gmch_remove;
3409
3410         ggtt->invalidate = gmch_ggtt_invalidate;
3411
3412         if (unlikely(ggtt->do_idle_maps))
3413                 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3414
3415         return 0;
3416 }
3417
3418 /**
3419  * i915_ggtt_probe_hw - Probe GGTT hardware location
3420  * @dev_priv: i915 device
3421  */
3422 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
3423 {
3424         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3425         int ret;
3426
3427         ggtt->base.i915 = dev_priv;
3428         ggtt->base.dma = &dev_priv->drm.pdev->dev;
3429
3430         if (INTEL_GEN(dev_priv) <= 5)
3431                 ret = i915_gmch_probe(ggtt);
3432         else if (INTEL_GEN(dev_priv) < 8)
3433                 ret = gen6_gmch_probe(ggtt);
3434         else
3435                 ret = gen8_gmch_probe(ggtt);
3436         if (ret)
3437                 return ret;
3438
3439         /* Trim the GGTT to fit the GuC mappable upper range (when enabled).
3440          * This is easier than doing range restriction on the fly, as we
3441          * currently don't have any bits spare to pass in this upper
3442          * restriction!
3443          */
3444         if (HAS_GUC(dev_priv) && i915_modparams.enable_guc_loading) {
3445                 ggtt->base.total = min_t(u64, ggtt->base.total, GUC_GGTT_TOP);
3446                 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total);
3447         }
3448
3449         if ((ggtt->base.total - 1) >> 32) {
3450                 DRM_ERROR("We never expected a Global GTT with more than 32bits"
3451                           " of address space! Found %lldM!\n",
3452                           ggtt->base.total >> 20);
3453                 ggtt->base.total = 1ULL << 32;
3454                 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total);
3455         }
3456
3457         if (ggtt->mappable_end > ggtt->base.total) {
3458                 DRM_ERROR("mappable aperture extends past end of GGTT,"
3459                           " aperture=%llx, total=%llx\n",
3460                           ggtt->mappable_end, ggtt->base.total);
3461                 ggtt->mappable_end = ggtt->base.total;
3462         }
3463
3464         /* GMADR is the PCI mmio aperture into the global GTT. */
3465         DRM_INFO("Memory usable by graphics device = %lluM\n",
3466                  ggtt->base.total >> 20);
3467         DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20);
3468         DRM_DEBUG_DRIVER("GTT stolen size = %uM\n", ggtt->stolen_size >> 20);
3469         if (intel_vtd_active())
3470                 DRM_INFO("VT-d active for gfx access\n");
3471
3472         return 0;
3473 }
3474
3475 /**
3476  * i915_ggtt_init_hw - Initialize GGTT hardware
3477  * @dev_priv: i915 device
3478  */
3479 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
3480 {
3481         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3482         int ret;
3483
3484         INIT_LIST_HEAD(&dev_priv->vm_list);
3485
3486         /* Note that we use page colouring to enforce a guard page at the
3487          * end of the address space. This is required as the CS may prefetch
3488          * beyond the end of the batch buffer, across the page boundary,
3489          * and beyond the end of the GTT if we do not provide a guard.
3490          */
3491         mutex_lock(&dev_priv->drm.struct_mutex);
3492         i915_address_space_init(&ggtt->base, dev_priv, "[global]");
3493         if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv))
3494                 ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
3495         mutex_unlock(&dev_priv->drm.struct_mutex);
3496
3497         if (!io_mapping_init_wc(&dev_priv->ggtt.mappable,
3498                                 dev_priv->ggtt.mappable_base,
3499                                 dev_priv->ggtt.mappable_end)) {
3500                 ret = -EIO;
3501                 goto out_gtt_cleanup;
3502         }
3503
3504         ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end);
3505
3506         /*
3507          * Initialise stolen early so that we may reserve preallocated
3508          * objects for the BIOS to KMS transition.
3509          */
3510         ret = i915_gem_init_stolen(dev_priv);
3511         if (ret)
3512                 goto out_gtt_cleanup;
3513
3514         return 0;
3515
3516 out_gtt_cleanup:
3517         ggtt->base.cleanup(&ggtt->base);
3518         return ret;
3519 }
3520
3521 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
3522 {
3523         if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
3524                 return -EIO;
3525
3526         return 0;
3527 }
3528
3529 void i915_ggtt_enable_guc(struct drm_i915_private *i915)
3530 {
3531         GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate);
3532
3533         i915->ggtt.invalidate = guc_ggtt_invalidate;
3534 }
3535
3536 void i915_ggtt_disable_guc(struct drm_i915_private *i915)
3537 {
3538         /* We should only be called after i915_ggtt_enable_guc() */
3539         GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate);
3540
3541         i915->ggtt.invalidate = gen6_ggtt_invalidate;
3542 }
3543
3544 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
3545 {
3546         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3547         struct drm_i915_gem_object *obj, *on;
3548
3549         i915_check_and_clear_faults(dev_priv);
3550
3551         /* First fill our portion of the GTT with scratch pages */
3552         ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total);
3553
3554         ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */
3555
3556         /* clflush objects bound into the GGTT and rebind them. */
3557         list_for_each_entry_safe(obj, on,
3558                                  &dev_priv->mm.bound_list, global_link) {
3559                 bool ggtt_bound = false;
3560                 struct i915_vma *vma;
3561
3562                 list_for_each_entry(vma, &obj->vma_list, obj_link) {
3563                         if (vma->vm != &ggtt->base)
3564                                 continue;
3565
3566                         if (!i915_vma_unbind(vma))
3567                                 continue;
3568
3569                         WARN_ON(i915_vma_bind(vma, obj->cache_level,
3570                                               PIN_UPDATE));
3571                         ggtt_bound = true;
3572                 }
3573
3574                 if (ggtt_bound)
3575                         WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
3576         }
3577
3578         ggtt->base.closed = false;
3579
3580         if (INTEL_GEN(dev_priv) >= 8) {
3581                 struct intel_ppat *ppat = &dev_priv->ppat;
3582
3583                 bitmap_set(ppat->dirty, 0, ppat->max_entries);
3584                 dev_priv->ppat.update_hw(dev_priv);
3585                 return;
3586         }
3587
3588         if (USES_PPGTT(dev_priv)) {
3589                 struct i915_address_space *vm;
3590
3591                 list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
3592                         struct i915_hw_ppgtt *ppgtt;
3593
3594                         if (i915_is_ggtt(vm))
3595                                 ppgtt = dev_priv->mm.aliasing_ppgtt;
3596                         else
3597                                 ppgtt = i915_vm_to_ppgtt(vm);
3598
3599                         gen6_write_page_range(ppgtt, 0, ppgtt->base.total);
3600                 }
3601         }
3602
3603         i915_ggtt_invalidate(dev_priv);
3604 }
3605
3606 static struct scatterlist *
3607 rotate_pages(const dma_addr_t *in, unsigned int offset,
3608              unsigned int width, unsigned int height,
3609              unsigned int stride,
3610              struct sg_table *st, struct scatterlist *sg)
3611 {
3612         unsigned int column, row;
3613         unsigned int src_idx;
3614
3615         for (column = 0; column < width; column++) {
3616                 src_idx = stride * (height - 1) + column;
3617                 for (row = 0; row < height; row++) {
3618                         st->nents++;
3619                         /* We don't need the pages, but need to initialize
3620                          * the entries so the sg list can be happily traversed.
3621                          * The only thing we need are DMA addresses.
3622                          */
3623                         sg_set_page(sg, NULL, PAGE_SIZE, 0);
3624                         sg_dma_address(sg) = in[offset + src_idx];
3625                         sg_dma_len(sg) = PAGE_SIZE;
3626                         sg = sg_next(sg);
3627                         src_idx -= stride;
3628                 }
3629         }
3630
3631         return sg;
3632 }
3633
3634 static noinline struct sg_table *
3635 intel_rotate_pages(struct intel_rotation_info *rot_info,
3636                    struct drm_i915_gem_object *obj)
3637 {
3638         const unsigned long n_pages = obj->base.size / PAGE_SIZE;
3639         unsigned int size = intel_rotation_info_size(rot_info);
3640         struct sgt_iter sgt_iter;
3641         dma_addr_t dma_addr;
3642         unsigned long i;
3643         dma_addr_t *page_addr_list;
3644         struct sg_table *st;
3645         struct scatterlist *sg;
3646         int ret = -ENOMEM;
3647
3648         /* Allocate a temporary list of source pages for random access. */
3649         page_addr_list = kvmalloc_array(n_pages,
3650                                         sizeof(dma_addr_t),
3651                                         GFP_KERNEL);
3652         if (!page_addr_list)
3653                 return ERR_PTR(ret);
3654
3655         /* Allocate target SG list. */
3656         st = kmalloc(sizeof(*st), GFP_KERNEL);
3657         if (!st)
3658                 goto err_st_alloc;
3659
3660         ret = sg_alloc_table(st, size, GFP_KERNEL);
3661         if (ret)
3662                 goto err_sg_alloc;
3663
3664         /* Populate source page list from the object. */
3665         i = 0;
3666         for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
3667                 page_addr_list[i++] = dma_addr;
3668
3669         GEM_BUG_ON(i != n_pages);
3670         st->nents = 0;
3671         sg = st->sgl;
3672
3673         for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
3674                 sg = rotate_pages(page_addr_list, rot_info->plane[i].offset,
3675                                   rot_info->plane[i].width, rot_info->plane[i].height,
3676                                   rot_info->plane[i].stride, st, sg);
3677         }
3678
3679         DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n",
3680                       obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3681
3682         kvfree(page_addr_list);
3683
3684         return st;
3685
3686 err_sg_alloc:
3687         kfree(st);
3688 err_st_alloc:
3689         kvfree(page_addr_list);
3690
3691         DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
3692                       obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3693
3694         return ERR_PTR(ret);
3695 }
3696
3697 static noinline struct sg_table *
3698 intel_partial_pages(const struct i915_ggtt_view *view,
3699                     struct drm_i915_gem_object *obj)
3700 {
3701         struct sg_table *st;
3702         struct scatterlist *sg, *iter;
3703         unsigned int count = view->partial.size;
3704         unsigned int offset;
3705         int ret = -ENOMEM;
3706
3707         st = kmalloc(sizeof(*st), GFP_KERNEL);
3708         if (!st)
3709                 goto err_st_alloc;
3710
3711         ret = sg_alloc_table(st, count, GFP_KERNEL);
3712         if (ret)
3713                 goto err_sg_alloc;
3714
3715         iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
3716         GEM_BUG_ON(!iter);
3717
3718         sg = st->sgl;
3719         st->nents = 0;
3720         do {
3721                 unsigned int len;
3722
3723                 len = min(iter->length - (offset << PAGE_SHIFT),
3724                           count << PAGE_SHIFT);
3725                 sg_set_page(sg, NULL, len, 0);
3726                 sg_dma_address(sg) =
3727                         sg_dma_address(iter) + (offset << PAGE_SHIFT);
3728                 sg_dma_len(sg) = len;
3729
3730                 st->nents++;
3731                 count -= len >> PAGE_SHIFT;
3732                 if (count == 0) {
3733                         sg_mark_end(sg);
3734                         return st;
3735                 }
3736
3737                 sg = __sg_next(sg);
3738                 iter = __sg_next(iter);
3739                 offset = 0;
3740         } while (1);
3741
3742 err_sg_alloc:
3743         kfree(st);
3744 err_st_alloc:
3745         return ERR_PTR(ret);
3746 }
3747
3748 static int
3749 i915_get_ggtt_vma_pages(struct i915_vma *vma)
3750 {
3751         int ret;
3752
3753         /* The vma->pages are only valid within the lifespan of the borrowed
3754          * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
3755          * must be the vma->pages. A simple rule is that vma->pages must only
3756          * be accessed when the obj->mm.pages are pinned.
3757          */
3758         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
3759
3760         switch (vma->ggtt_view.type) {
3761         case I915_GGTT_VIEW_NORMAL:
3762                 vma->pages = vma->obj->mm.pages;
3763                 return 0;
3764
3765         case I915_GGTT_VIEW_ROTATED:
3766                 vma->pages =
3767                         intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
3768                 break;
3769
3770         case I915_GGTT_VIEW_PARTIAL:
3771                 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
3772                 break;
3773
3774         default:
3775                 WARN_ONCE(1, "GGTT view %u not implemented!\n",
3776                           vma->ggtt_view.type);
3777                 return -EINVAL;
3778         }
3779
3780         ret = 0;
3781         if (unlikely(IS_ERR(vma->pages))) {
3782                 ret = PTR_ERR(vma->pages);
3783                 vma->pages = NULL;
3784                 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3785                           vma->ggtt_view.type, ret);
3786         }
3787         return ret;
3788 }
3789
3790 /**
3791  * i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
3792  * @vm: the &struct i915_address_space
3793  * @node: the &struct drm_mm_node (typically i915_vma.mode)
3794  * @size: how much space to allocate inside the GTT,
3795  *        must be #I915_GTT_PAGE_SIZE aligned
3796  * @offset: where to insert inside the GTT,
3797  *          must be #I915_GTT_MIN_ALIGNMENT aligned, and the node
3798  *          (@offset + @size) must fit within the address space
3799  * @color: color to apply to node, if this node is not from a VMA,
3800  *         color must be #I915_COLOR_UNEVICTABLE
3801  * @flags: control search and eviction behaviour
3802  *
3803  * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside
3804  * the address space (using @size and @color). If the @node does not fit, it
3805  * tries to evict any overlapping nodes from the GTT, including any
3806  * neighbouring nodes if the colors do not match (to ensure guard pages between
3807  * differing domains). See i915_gem_evict_for_node() for the gory details
3808  * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on
3809  * evicting active overlapping objects, and any overlapping node that is pinned
3810  * or marked as unevictable will also result in failure.
3811  *
3812  * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
3813  * asked to wait for eviction and interrupted.
3814  */
3815 int i915_gem_gtt_reserve(struct i915_address_space *vm,
3816                          struct drm_mm_node *node,
3817                          u64 size, u64 offset, unsigned long color,
3818                          unsigned int flags)
3819 {
3820         int err;
3821
3822         GEM_BUG_ON(!size);
3823         GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
3824         GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
3825         GEM_BUG_ON(range_overflows(offset, size, vm->total));
3826         GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base);
3827         GEM_BUG_ON(drm_mm_node_allocated(node));
3828
3829         node->size = size;
3830         node->start = offset;
3831         node->color = color;
3832
3833         err = drm_mm_reserve_node(&vm->mm, node);
3834         if (err != -ENOSPC)
3835                 return err;
3836
3837         if (flags & PIN_NOEVICT)
3838                 return -ENOSPC;
3839
3840         err = i915_gem_evict_for_node(vm, node, flags);
3841         if (err == 0)
3842                 err = drm_mm_reserve_node(&vm->mm, node);
3843
3844         return err;
3845 }
3846
3847 static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
3848 {
3849         u64 range, addr;
3850
3851         GEM_BUG_ON(range_overflows(start, len, end));
3852         GEM_BUG_ON(round_up(start, align) > round_down(end - len, align));
3853
3854         range = round_down(end - len, align) - round_up(start, align);
3855         if (range) {
3856                 if (sizeof(unsigned long) == sizeof(u64)) {
3857                         addr = get_random_long();
3858                 } else {
3859                         addr = get_random_int();
3860                         if (range > U32_MAX) {
3861                                 addr <<= 32;
3862                                 addr |= get_random_int();
3863                         }
3864                 }
3865                 div64_u64_rem(addr, range, &addr);
3866                 start += addr;
3867         }
3868
3869         return round_up(start, align);
3870 }
3871
3872 /**
3873  * i915_gem_gtt_insert - insert a node into an address_space (GTT)
3874  * @vm: the &struct i915_address_space
3875  * @node: the &struct drm_mm_node (typically i915_vma.node)
3876  * @size: how much space to allocate inside the GTT,
3877  *        must be #I915_GTT_PAGE_SIZE aligned
3878  * @alignment: required alignment of starting offset, may be 0 but
3879  *             if specified, this must be a power-of-two and at least
3880  *             #I915_GTT_MIN_ALIGNMENT
3881  * @color: color to apply to node
3882  * @start: start of any range restriction inside GTT (0 for all),
3883  *         must be #I915_GTT_PAGE_SIZE aligned
3884  * @end: end of any range restriction inside GTT (U64_MAX for all),
3885  *       must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX
3886  * @flags: control search and eviction behaviour
3887  *
3888  * i915_gem_gtt_insert() first searches for an available hole into which
3889  * is can insert the node. The hole address is aligned to @alignment and
3890  * its @size must then fit entirely within the [@start, @end] bounds. The
3891  * nodes on either side of the hole must match @color, or else a guard page
3892  * will be inserted between the two nodes (or the node evicted). If no
3893  * suitable hole is found, first a victim is randomly selected and tested
3894  * for eviction, otherwise then the LRU list of objects within the GTT
3895  * is scanned to find the first set of replacement nodes to create the hole.
3896  * Those old overlapping nodes are evicted from the GTT (and so must be
3897  * rebound before any future use). Any node that is currently pinned cannot
3898  * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently
3899  * active and #PIN_NONBLOCK is specified, that node is also skipped when
3900  * searching for an eviction candidate. See i915_gem_evict_something() for
3901  * the gory details on the eviction algorithm.
3902  *
3903  * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
3904  * asked to wait for eviction and interrupted.
3905  */
3906 int i915_gem_gtt_insert(struct i915_address_space *vm,
3907                         struct drm_mm_node *node,
3908                         u64 size, u64 alignment, unsigned long color,
3909                         u64 start, u64 end, unsigned int flags)
3910 {
3911         enum drm_mm_insert_mode mode;
3912         u64 offset;
3913         int err;
3914
3915         lockdep_assert_held(&vm->i915->drm.struct_mutex);
3916         GEM_BUG_ON(!size);
3917         GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
3918         GEM_BUG_ON(alignment && !is_power_of_2(alignment));
3919         GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
3920         GEM_BUG_ON(start >= end);
3921         GEM_BUG_ON(start > 0  && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
3922         GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
3923         GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base);
3924         GEM_BUG_ON(drm_mm_node_allocated(node));
3925
3926         if (unlikely(range_overflows(start, size, end)))
3927                 return -ENOSPC;
3928
3929         if (unlikely(round_up(start, alignment) > round_down(end - size, alignment)))
3930                 return -ENOSPC;
3931
3932         mode = DRM_MM_INSERT_BEST;
3933         if (flags & PIN_HIGH)
3934                 mode = DRM_MM_INSERT_HIGH;
3935         if (flags & PIN_MAPPABLE)
3936                 mode = DRM_MM_INSERT_LOW;
3937
3938         /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
3939          * so we know that we always have a minimum alignment of 4096.
3940          * The drm_mm range manager is optimised to return results
3941          * with zero alignment, so where possible use the optimal
3942          * path.
3943          */
3944         BUILD_BUG_ON(I915_GTT_MIN_ALIGNMENT > I915_GTT_PAGE_SIZE);
3945         if (alignment <= I915_GTT_MIN_ALIGNMENT)
3946                 alignment = 0;
3947
3948         err = drm_mm_insert_node_in_range(&vm->mm, node,
3949                                           size, alignment, color,
3950                                           start, end, mode);
3951         if (err != -ENOSPC)
3952                 return err;
3953
3954         if (flags & PIN_NOEVICT)
3955                 return -ENOSPC;
3956
3957         /* No free space, pick a slot at random.
3958          *
3959          * There is a pathological case here using a GTT shared between
3960          * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt):
3961          *
3962          *    |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->|
3963          *         (64k objects)             (448k objects)
3964          *
3965          * Now imagine that the eviction LRU is ordered top-down (just because
3966          * pathology meets real life), and that we need to evict an object to
3967          * make room inside the aperture. The eviction scan then has to walk
3968          * the 448k list before it finds one within range. And now imagine that
3969          * it has to search for a new hole between every byte inside the memcpy,
3970          * for several simultaneous clients.
3971          *
3972          * On a full-ppgtt system, if we have run out of available space, there
3973          * will be lots and lots of objects in the eviction list! Again,
3974          * searching that LRU list may be slow if we are also applying any
3975          * range restrictions (e.g. restriction to low 4GiB) and so, for
3976          * simplicity and similarilty between different GTT, try the single
3977          * random replacement first.
3978          */
3979         offset = random_offset(start, end,
3980                                size, alignment ?: I915_GTT_MIN_ALIGNMENT);
3981         err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags);
3982         if (err != -ENOSPC)
3983                 return err;
3984
3985         /* Randomly selected placement is pinned, do a search */
3986         err = i915_gem_evict_something(vm, size, alignment, color,
3987                                        start, end, flags);
3988         if (err)
3989                 return err;
3990
3991         return drm_mm_insert_node_in_range(&vm->mm, node,
3992                                            size, alignment, color,
3993                                            start, end, DRM_MM_INSERT_EVICT);
3994 }
3995
3996 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
3997 #include "selftests/mock_gtt.c"
3998 #include "selftests/i915_gem_gtt.c"
3999 #endif